diff --git a/.env.example b/.env.example new file mode 100644 index 0000000..0397146 --- /dev/null +++ b/.env.example @@ -0,0 +1,13 @@ +# Input directory - where the producer will monitor for .txt files +# This can be any directory on your host machine +# Examples: +# - Relative path: ./data/input +# - Absolute path: /home/user/documents/text-files +# - Windows path: C:/Users/username/Documents/text-files +INPUT_DIR=./data/input + +# Output directory - where JSON results will be saved +OUTPUT_DIR=./data/output + +# Configuration file path +CONFIG_FILE=./config.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..bbf4a74 --- /dev/null +++ b/.gitignore @@ -0,0 +1,26 @@ +.idea +.gradle +build/ +*.iml +*.class +*.jar +*.log + +# Data directories (exclude output, keep input with samples) +data/output/*.json + +# Go +vendor/ + +# Python +__pycache__/ +*.pyc +*.pyo +venv/ +.venv/ + +# Docker +.docker/ + +# Environment variables +.env \ No newline at end of file diff --git a/.gradle/8.4/checksums/checksums.lock b/.gradle/8.4/checksums/checksums.lock deleted file mode 100644 index b01eae2..0000000 Binary files a/.gradle/8.4/checksums/checksums.lock and /dev/null differ diff --git a/.gradle/8.4/checksums/md5-checksums.bin b/.gradle/8.4/checksums/md5-checksums.bin deleted file mode 100644 index 1cf5320..0000000 Binary files a/.gradle/8.4/checksums/md5-checksums.bin and /dev/null differ diff --git a/.gradle/8.4/checksums/sha1-checksums.bin b/.gradle/8.4/checksums/sha1-checksums.bin deleted file mode 100644 index d8e573f..0000000 Binary files a/.gradle/8.4/checksums/sha1-checksums.bin and /dev/null differ diff --git a/.gradle/8.4/dependencies-accessors/dependencies-accessors.lock b/.gradle/8.4/dependencies-accessors/dependencies-accessors.lock deleted file mode 100644 index c09e673..0000000 Binary files a/.gradle/8.4/dependencies-accessors/dependencies-accessors.lock and /dev/null differ diff --git a/.gradle/8.4/executionHistory/executionHistory.bin b/.gradle/8.4/executionHistory/executionHistory.bin deleted file mode 100644 index 5516540..0000000 Binary files a/.gradle/8.4/executionHistory/executionHistory.bin and /dev/null differ diff --git a/.gradle/8.4/executionHistory/executionHistory.lock b/.gradle/8.4/executionHistory/executionHistory.lock deleted file mode 100644 index b20bb36..0000000 Binary files a/.gradle/8.4/executionHistory/executionHistory.lock and /dev/null differ diff --git a/.gradle/8.4/fileChanges/last-build.bin b/.gradle/8.4/fileChanges/last-build.bin deleted file mode 100644 index f76dd23..0000000 Binary files a/.gradle/8.4/fileChanges/last-build.bin and /dev/null differ diff --git a/.gradle/8.4/fileHashes/fileHashes.bin b/.gradle/8.4/fileHashes/fileHashes.bin deleted file mode 100644 index 7d9de1c..0000000 Binary files a/.gradle/8.4/fileHashes/fileHashes.bin and /dev/null differ diff --git a/.gradle/8.4/fileHashes/fileHashes.lock b/.gradle/8.4/fileHashes/fileHashes.lock deleted file mode 100644 index 1c30060..0000000 Binary files a/.gradle/8.4/fileHashes/fileHashes.lock and /dev/null differ diff --git a/.gradle/8.4/fileHashes/resourceHashesCache.bin b/.gradle/8.4/fileHashes/resourceHashesCache.bin deleted file mode 100644 index ea5df03..0000000 Binary files a/.gradle/8.4/fileHashes/resourceHashesCache.bin and /dev/null differ diff --git a/.gradle/8.4/gc.properties b/.gradle/8.4/gc.properties deleted file mode 100644 index e69de29..0000000 diff --git a/.gradle/buildOutputCleanup/buildOutputCleanup.lock b/.gradle/buildOutputCleanup/buildOutputCleanup.lock deleted file mode 100644 index c05759e..0000000 Binary files a/.gradle/buildOutputCleanup/buildOutputCleanup.lock and /dev/null differ diff --git a/.gradle/buildOutputCleanup/cache.properties b/.gradle/buildOutputCleanup/cache.properties deleted file mode 100644 index 674ef95..0000000 --- a/.gradle/buildOutputCleanup/cache.properties +++ /dev/null @@ -1,2 +0,0 @@ -#Tue Apr 09 14:58:45 MSK 2024 -gradle.version=8.4 diff --git a/.gradle/buildOutputCleanup/outputFiles.bin b/.gradle/buildOutputCleanup/outputFiles.bin deleted file mode 100644 index 17f893c..0000000 Binary files a/.gradle/buildOutputCleanup/outputFiles.bin and /dev/null differ diff --git a/.gradle/file-system.probe b/.gradle/file-system.probe deleted file mode 100644 index cec7bc0..0000000 Binary files a/.gradle/file-system.probe and /dev/null differ diff --git a/.gradle/vcs-1/gc.properties b/.gradle/vcs-1/gc.properties deleted file mode 100644 index e69de29..0000000 diff --git a/Dockerfile.go b/Dockerfile.go new file mode 100644 index 0000000..b15de41 --- /dev/null +++ b/Dockerfile.go @@ -0,0 +1,21 @@ +FROM golang:1.21-alpine AS builder + +WORKDIR /app + +COPY src/go.mod src/go.sum ./ +RUN go mod download + +COPY src/ ./ + +ARG SERVICE_NAME +RUN CGO_ENABLED=0 GOOS=linux go build -o /app/service ./${SERVICE_NAME} + +FROM alpine:latest + +RUN apk --no-cache add ca-certificates + +WORKDIR /root/ + +COPY --from=builder /app/service . + +CMD ["./service"] diff --git a/Dockerfile.python b/Dockerfile.python new file mode 100644 index 0000000..c579e8d --- /dev/null +++ b/Dockerfile.python @@ -0,0 +1,11 @@ +FROM python:3.10-slim + +WORKDIR /app + +COPY src/workers/sentiment/requirements.txt . + +RUN pip install --no-cache-dir -r requirements.txt + +COPY src/workers/sentiment/worker.py . + +CMD ["python", "-u", "worker.py"] diff --git a/README.md b/README.md index 97f8c85..98999e5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,4 @@ +[![Review Assignment Due Date](https://classroom.github.com/assets/deadline-readme-button-22041afd0340ce965d47ae6ef1cefeee28c7c493a6346c4f15d667ab976d596c.svg)](https://classroom.github.com/a/QODoQuhO) # Распределенная обработка текстовых данных с использованием брокера сообщений ## Цель задания: diff --git a/config.json b/config.json new file mode 100644 index 0000000..985538a --- /dev/null +++ b/config.json @@ -0,0 +1,30 @@ +{ + "rabbitmq": { + "url": "amqp://guest:guest@rabbitmq:5672/", + "exchange_name": "text_processing", + "exchange_type": "topic", + "tasks_routing_key_prefix": "task", + "results_routing_key": "results" + }, + "workers": { + "word_count_workers": 2, + "top_n_workers": 2, + "sentence_sort_workers": 2, + "sentiment_workers": 2, + "name_replacement_workers": 2 + }, + "processing": { + "top_n_words": 10, + "section_size_chars": 5000 + }, + "producer": { + "monitor_interval_seconds": 3, + "file_ready_check_delay_ms": 500 + }, + "input": { + "data_dir": "/data/input/articles" + }, + "output": { + "results_dir": "/data/output" + } +} diff --git a/data/input/performance-test.txt b/data/input/performance-test.txt new file mode 100644 index 0000000..da57b6e --- /dev/null +++ b/data/input/performance-test.txt @@ -0,0 +1 @@ +This is a test file for performance tracking. The system works great and processes text efficiently. Every component performs well. The distributed architecture enables parallel processing. Workers handle tasks quickly and reliably. diff --git a/data/input/sample-russian.txt b/data/input/sample-russian.txt new file mode 100644 index 0000000..18ed9d0 --- /dev/null +++ b/data/input/sample-russian.txt @@ -0,0 +1,10 @@ +##### +Иванов Иван Иванович +Смирнов Семён Семёнович +##### + +Иванов Иван Иванович работает в крупной технологической компании. Работа Иванова связана с распределёнными системами. Ивану Ивановичу нравится программирование на Go и Python. +Вчера Иванов встретился с коллегой для обсуждения нового проекта. Идеи Иванова о внедрении RabbitMQ были хорошо восприняты командой. Все были впечатлены техническими знаниями Ивана. +Руководитель попросил Иванова Ивана Ивановича возглавить новую инициативу. Иван будет отвечать за архитектуру системы. Все согласны, что Иванов - идеальный выбор для этой роли. +В свободное время Иван Иванович любит читать технические книги. Недавно Иванов выступил с докладом на конференции разработчиков. Презентация Ивана была хорошо принята аудиторией. +Коллеги Иванова высоко ценят его опыт. Работа с Ивановым всегда продуктивна. Ивану удаётся находить решения для сложных задач. diff --git a/data/input/sample-with-name-replacement.txt b/data/input/sample-with-name-replacement.txt new file mode 100644 index 0000000..f21d632 --- /dev/null +++ b/data/input/sample-with-name-replacement.txt @@ -0,0 +1,11 @@ +##### +Ivanov Ivan Ivanovich +Smirnov Semyon Semyonovich +##### + +Ivanov Ivan Ivanovich is a talented software engineer who works at a major technology company. He has been developing distributed systems for many years. Ivan Ivanovich is known for his expertise in message queue systems and microservices architecture. +Yesterday, Ivanov met with his colleague to discuss the new project requirements. Mr. Ivanov presented his ideas about implementing a RabbitMQ-based solution. The team was impressed by Ivan's technical knowledge and problem-solving skills. +IVANOV has contributed significantly to open-source projects. His GitHub profile shows numerous repositories related to distributed computing. I. I. Ivanov is respected in the developer community for his clear documentation and helpful code reviews. +The project manager asked Ivanov I. I. to lead the new initiative. Ivan will be responsible for architecting the system and mentoring junior developers. Everyone agrees that Ivanov Ivan is the perfect choice for this challenging role. +In his personal life, Ivan Ivanovich enjoys reading technical books and attending conferences. He recently gave a talk about scalable architectures at a major developer conference. The presentation by Ivanov was well-received by the audience. +Looking forward, I.I. Ivanov plans to continue contributing to the field of distributed systems. His passion for technology and dedication to excellence make him an invaluable team member. The future looks bright for both Ivanov and his projects. diff --git a/data/input/sample1.txt b/data/input/sample1.txt new file mode 100644 index 0000000..35071f3 --- /dev/null +++ b/data/input/sample1.txt @@ -0,0 +1,7 @@ +The distributed text processing system is an amazing achievement in modern software engineering. It demonstrates the power of microservices architecture and message-driven design patterns. RabbitMQ enables efficient communication between different components, allowing them to work together seamlessly. +This system processes text data in parallel using multiple workers. Each worker specializes in a specific task, making the overall system more efficient and scalable. The word count worker analyzes the number of words in each section. The top-N words worker identifies the most frequently occurring terms. +Sentiment analysis is crucial for understanding the emotional tone of text. Using advanced machine learning models from Hugging Face, we can determine whether the text expresses positive, negative, or neutral sentiment. This capability has numerous applications in business intelligence and social media monitoring. +The aggregator component plays a vital role in combining results from all workers. It waits for all sections to be processed before merging the data. The merge operation ensures that results are combined in the correct order, maintaining data integrity throughout the pipeline. +Modern distributed systems must be fault-tolerant and resilient. Docker containers provide isolation and consistency across different environments. Docker Compose orchestrates multiple services, making deployment simple and repeatable. This architecture supports horizontal scaling by adding more worker instances. +Performance optimization is essential for handling large datasets. The system splits text into manageable sections, enabling parallel processing. Load balancing ensures that work is distributed evenly among available workers. Monitoring and logging help identify bottlenecks and improve system performance. +The future of text processing lies in combining traditional algorithms with deep learning models. Natural language processing continues to evolve with new techniques and approaches. Cloud-native architectures enable systems to scale dynamically based on workload demands. Innovation in this field drives progress across many industries. diff --git a/data/input/sample2.txt b/data/input/sample2.txt new file mode 100644 index 0000000..165e35d --- /dev/null +++ b/data/input/sample2.txt @@ -0,0 +1,5 @@ +Software development is a challenging but rewarding field. Every day brings new problems to solve and opportunities to learn. Developers must constantly adapt to changing technologies and methodologies. The pace of innovation never slows down. +Unfortunately, not all projects succeed. Many fail due to poor planning or communication issues. Technical debt accumulates when shortcuts are taken. Maintenance becomes increasingly difficult over time. These challenges test the resilience of development teams. +However, success is achievable with the right approach. Good documentation helps team members understand complex systems. Code reviews improve quality and share knowledge. Automated testing catches bugs early in the development cycle. These practices lead to better outcomes. +Collaboration tools have transformed how teams work together. Version control systems like Git enable parallel development. Issue trackers organize tasks and priorities. Communication platforms facilitate discussion and decision-making. Remote work has become the norm for many organizations. +The satisfaction of solving difficult problems makes the effort worthwhile. Seeing users benefit from your work provides motivation. Building something that improves peoples lives is incredibly fulfilling. This sense of purpose drives developers to excel in their craft. diff --git a/data/input/test-performance.txt b/data/input/test-performance.txt new file mode 100644 index 0000000..35071f3 --- /dev/null +++ b/data/input/test-performance.txt @@ -0,0 +1,7 @@ +The distributed text processing system is an amazing achievement in modern software engineering. It demonstrates the power of microservices architecture and message-driven design patterns. RabbitMQ enables efficient communication between different components, allowing them to work together seamlessly. +This system processes text data in parallel using multiple workers. Each worker specializes in a specific task, making the overall system more efficient and scalable. The word count worker analyzes the number of words in each section. The top-N words worker identifies the most frequently occurring terms. +Sentiment analysis is crucial for understanding the emotional tone of text. Using advanced machine learning models from Hugging Face, we can determine whether the text expresses positive, negative, or neutral sentiment. This capability has numerous applications in business intelligence and social media monitoring. +The aggregator component plays a vital role in combining results from all workers. It waits for all sections to be processed before merging the data. The merge operation ensures that results are combined in the correct order, maintaining data integrity throughout the pipeline. +Modern distributed systems must be fault-tolerant and resilient. Docker containers provide isolation and consistency across different environments. Docker Compose orchestrates multiple services, making deployment simple and repeatable. This architecture supports horizontal scaling by adding more worker instances. +Performance optimization is essential for handling large datasets. The system splits text into manageable sections, enabling parallel processing. Load balancing ensures that work is distributed evenly among available workers. Monitoring and logging help identify bottlenecks and improve system performance. +The future of text processing lies in combining traditional algorithms with deep learning models. Natural language processing continues to evolve with new techniques and approaches. Cloud-native architectures enable systems to scale dynamically based on workload demands. Innovation in this field drives progress across many industries. diff --git a/.gradle/8.4/dependencies-accessors/gc.properties b/data/output/.gitkeep similarity index 100% rename from .gradle/8.4/dependencies-accessors/gc.properties rename to data/output/.gitkeep diff --git a/data/output/0000800d9058217f6509d7e63ad475e2de0da611.json b/data/output/0000800d9058217f6509d7e63ad475e2de0da611.json new file mode 100644 index 0000000..902982b --- /dev/null +++ b/data/output/0000800d9058217f6509d7e63ad475e2de0da611.json @@ -0,0 +1,398 @@ +{ + "file_name": "0000800d9058217f6509d7e63ad475e2de0da611.txt", + "total_words": 748, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "was", + "count": 20 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "e", + "count": 14 + }, + { + "word": "it", + "count": 14 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "cigarette", + "count": 13 + }, + { + "word": "fire", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "He .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Miss .", + "length": 6 + }, + { + "text": "Fire .", + "length": 6 + }, + { + "text": "Yasin .", + "length": 7 + }, + { + "text": "Philip .", + "length": 8 + }, + { + "text": "’ The .", + "length": 9 + }, + { + "text": "monoxide.", + "length": 9 + }, + { + "text": "With her .", + "length": 10 + }, + { + "text": "it was on fire.", + "length": 15 + }, + { + "text": "But there have .", + "length": 16 + }, + { + "text": "Richard Spillett .", + "length": 18 + }, + { + "text": "before,’ she said.", + "length": 20 + }, + { + "text": "exploded next to me.", + "length": 20 + }, + { + "text": "problem had occurred.", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Chesterfield, Derbyshire.", + "length": 25 + }, + { + "text": "we would take further action.", + "length": 29 + }, + { + "text": "‘I’ve gone to sleep with it plugged in .", + "length": 44 + }, + { + "text": "'I’ve gone to sleep with it plugged in before.", + "length": 48 + }, + { + "text": "‘I hate to think what would have happened if it had .", + "length": 55 + }, + { + "text": "already ‘exploded into bits and melted into the carpet’.", + "length": 60 + }, + { + "text": "thought to have claimed its first victim in November last year.", + "length": 63 + }, + { + "text": "Taylor, from Leicester, had put her e-cigarette on charge in an .", + "length": 65 + }, + { + "text": "chiefs said the blaze that killed 68-year-old Evelyn Raywood was .", + "length": 66 + }, + { + "text": "battery-powered product is billed as a healthy alternative to real .", + "length": 68 + }, + { + "text": "cigarettes as users inhale nicotine with no tar, tobacco or carbon .", + "length": 68 + }, + { + "text": "son Jake, 11, she threw water on the blaze but the e-cigarette had .", + "length": 68 + }, + { + "text": "auxiliary socket in the back of her Ford Mondeo during a short drive.", + "length": 69 + }, + { + "text": "Le Shirley, from the Royal Society for the Prevention of Accidents, .", + "length": 69 + }, + { + "text": "been several cases of the device exploding while on charge and it is .", + "length": 70 + }, + { + "text": "said: 'It is important not to leave e-cigarettes to charge unattended.", + "length": 70 + }, + { + "text": "said the firm sold car adapters that must be used with the device, as .", + "length": 71 + }, + { + "text": "Patel, director of Prestige Vaping, said it had sold thousands of the .", + "length": 71 + }, + { + "text": "same starter pack Miss Taylor bought – and this was the first time a .", + "length": 72 + }, + { + "text": "it was only when she returned to the vehicle by chance that she realised .", + "length": 74 + }, + { + "text": "started by an e-cigarette battery pack left charging at her care home in .", + "length": 74 + }, + { + "text": "stated in the user manual, adding: ‘If the appropriate charger was used .", + "length": 75 + }, + { + "text": "When she arrived home she left it there for a further ten minutes – and .", + "length": 75 + }, + { + "text": "Barmaid Laura Baty was burned when her e-cigarette exploded while it was charging .", + "length": 83 + }, + { + "text": "I hate to think what would have happened if it had exploded next to me' Kim Taylor .", + "length": 84 + }, + { + "text": "London fire chiefs believe this e-cigarette charger may have been behind a blaze in Barking .", + "length": 93 + }, + { + "text": "'If the appropriate charger was used we would take further action' Yasin Patel, Prestige Vaping .", + "length": 97 + }, + { + "text": "Concerns: Miss Taylor, a 54-year-old carer now fears for her family's safety when around the devices .", + "length": 102 + }, + { + "text": "Dangerous: Miss Taylor said the explosion and resulting fire filled the car with foul-smelling smoke .", + "length": 102 + }, + { + "text": "The e-cigarette and charger were made by Prestige Vaping but the car adapter plug was bought elsewhere.", + "length": 103 + }, + { + "text": "An electronic cigarette that exploded while charging in a car has prompted fresh safety fears over the product.", + "length": 111 + }, + { + "text": "’ Miss Taylor, who also has a daughter aged 26, claims neither the e-cigarette nor charger came with a manual.", + "length": 112 + }, + { + "text": "Safety fear: Kim Taylor and her son Jake, 11, had to extinguish a fire in her car after an e-cigarette exploded .", + "length": 113 + }, + { + "text": "Laura Baty, 18, managed to quickly turn away from the flames  - but was left with a burnt dress and a injured arm.", + "length": 115 + }, + { + "text": "Earlier this month, a pensioner was engulfed in flames on a ward at the Wythenshawe Hospital in Manchester in another incident.", + "length": 127 + }, + { + "text": "It is believed 65-year-old Jean Booth used an e-cigarette while she was on oxygen in her hospital bed and was left badly burned.", + "length": 128 + }, + { + "text": "Fire chiefs investigating the incident said they believe an e-cigarette could have overheated while it was plugged into a mains socket.", + "length": 135 + }, + { + "text": "'The danger is that people sometimes use incorrect chargers which runs the risk of over-charging, which can potentially have explosive results.", + "length": 143 + }, + { + "text": "Carer Kim Taylor, 54, left the device on charge for just ten minutes before she noticed flames and thick black smoke billowing from the vehicle.", + "length": 144 + }, + { + "text": "Just two weeks earlier, an e-cigarette exploded in a barmaid's face while it was being charged near the bar of a pub in Richmond, North Yorkshire.", + "length": 146 + }, + { + "text": "More than 20 firefighters tackled the fire in a ground floor flat in Barking and rescued a woman, who was taken to hospital with smoke inhalation and shock.", + "length": 156 + }, + { + "text": "' The latest incident comes just a week after fire crews investigating a fire in East London said they believe the blaze was started by an e-cigarette on charge.", + "length": 161 + }, + { + "text": "No one was inside at the time, but the back seat was completely melted and the mother-of-two was horrified at the thought she and her family could have been injured.", + "length": 165 + }, + { + "text": "Following the spate of incidents, fire investigator Charlie Pugsley said: 'People assume e-cigarettes are much safer than ordinary cigarettes, and in most cases they are.", + "length": 170 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.802448034286499 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:31:56.388201473Z", + "first_section_created": "2025-12-23T09:31:56.388623389Z", + "last_section_published": "2025-12-23T09:31:56.3889085Z", + "all_results_received": "2025-12-23T09:31:56.57458655Z", + "output_generated": "2025-12-23T09:31:56.574841359Z", + "total_processing_time_ms": 186, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 185, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:31:56.388623389Z", + "publish_time": "2025-12-23T09:31:56.3889085Z", + "first_worker_start": "2025-12-23T09:31:56.390014442Z", + "last_worker_end": "2025-12-23T09:31:56.573527Z", + "total_journey_time_ms": 184, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:56.389906738Z", + "start_time": "2025-12-23T09:31:56.390067044Z", + "end_time": "2025-12-23T09:31:56.390179748Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:56.390177Z", + "start_time": "2025-12-23T09:31:56.390329Z", + "end_time": "2025-12-23T09:31:56.573527Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 183 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:56.389909338Z", + "start_time": "2025-12-23T09:31:56.390105445Z", + "end_time": "2025-12-23T09:31:56.390306953Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:56.389797634Z", + "start_time": "2025-12-23T09:31:56.390014442Z", + "end_time": "2025-12-23T09:31:56.390093045Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 183, + "min_processing_ms": 183, + "max_processing_ms": 183, + "avg_processing_ms": 183, + "median_processing_ms": 183, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + } + }, + "total_sections": 1, + "average_section_size": 4292, + "slowest_section_id": 0, + "slowest_section_time_ms": 184 + } +} diff --git a/data/output/0000bf554ca24b0c72178403b54c0cca62d9faf8.json b/data/output/0000bf554ca24b0c72178403b54c0cca62d9faf8.json new file mode 100644 index 0000000..7215a88 --- /dev/null +++ b/data/output/0000bf554ca24b0c72178403b54c0cca62d9faf8.json @@ -0,0 +1,426 @@ +{ + "file_name": "0000bf554ca24b0c72178403b54c0cca62d9faf8.txt", + "total_words": 812, + "top_n_words": [ + { + "word": "of", + "count": 32 + }, + { + "word": "the", + "count": 30 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "he", + "count": 14 + }, + { + "word": "levene", + "count": 14 + }, + { + "word": "his", + "count": 12 + }, + { + "word": "with", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "6million .", + "length": 10 + }, + { + "text": "Alex Ward .", + "length": 11 + }, + { + "text": "Levene’s main .", + "length": 17 + }, + { + "text": "City Football Club.", + "length": 19 + }, + { + "text": "he could not pay them.", + "length": 22 + }, + { + "text": "Switzerland and Israel.", + "length": 23 + }, + { + "text": "seven days,’ she said.", + "length": 24 + }, + { + "text": "to be paid in seven days.", + "length": 25 + }, + { + "text": "investigations into his assets.", + "length": 31 + }, + { + "text": "into further realisable assets.", + "length": 31 + }, + { + "text": "this, his victims have recouped.", + "length": 32 + }, + { + "text": "‘In respect of the realisable .", + "length": 33 + }, + { + "text": "With his network of contacts and .", + "length": 34 + }, + { + "text": "The married father of three took .", + "length": 34 + }, + { + "text": "It is unclear how much, if any of .", + "length": 35 + }, + { + "text": "mining firms Xstrata and Rio Tinto.", + "length": 35 + }, + { + "text": "The fraudster had a fleet of luxury .", + "length": 37 + }, + { + "text": "Levene, a former deputy chairman of .", + "length": 37 + }, + { + "text": "be dealt with by bankruptcy officials.", + "length": 38 + }, + { + "text": "He would take from Peter to pay Paul .", + "length": 38 + }, + { + "text": "She added that anything seized would .", + "length": 38 + }, + { + "text": "Having been told about the seizure of .", + "length": 39 + }, + { + "text": "But he dug an ever-deepening financial .", + "length": 40 + }, + { + "text": "Justine  Davidge, representing the SFO, .", + "length": 42 + }, + { + "text": "’ Levene admitted ripping off a series .", + "length": 42 + }, + { + "text": "wealth of £15million to £20million in 2005.", + "length": 45 + }, + { + "text": "‘It may be in the future, Mr Levene could come .", + "length": 50 + }, + { + "text": "’ Levene did not appear in court for the hearing.", + "length": 51 + }, + { + "text": "Nicknamed Beano because of his childhood love of the .", + "length": 54 + }, + { + "text": "which featured a performance by girlband The Saturdays.", + "length": 55 + }, + { + "text": "of obtaining a money transfer by deception, and 12 of fraud.", + "length": 60 + }, + { + "text": "bankruptcy order since October 2009 and there were ongoing .", + "length": 60 + }, + { + "text": "investment deals from which he would take a commission or fee.", + "length": 62 + }, + { + "text": "The Ivy and Le Caprice restaurants in the West End; and Russell .", + "length": 65 + }, + { + "text": "comic book, Levene was a successful City worker with an estimated .", + "length": 67 + }, + { + "text": "strong reputation, he won people’s faith with seemingly concrete .", + "length": 68 + }, + { + "text": "rights-issue releases from  companies such as HSBC, Lloyds TSB and .", + "length": 69 + }, + { + "text": "Levene’s assets, Judge Martin Beddow said: ‘As there is nothing .", + "length": 69 + }, + { + "text": "hole for himself, having to fob off clients and make excuses about why .", + "length": 72 + }, + { + "text": "amount, we suggest the court make a nominal order of £1 to be paid in .", + "length": 72 + }, + { + "text": "told Southwark Crown Court yesterday that Levene had been subject to a .", + "length": 72 + }, + { + "text": "and move the funds between accounts in the financial havens of Jersey, .", + "length": 72 + }, + { + "text": "house was a £2million eight-bedroom property in  Barnet, North London.", + "length": 72 + }, + { + "text": "Bartlett, director of the R3 Investment Group and former owner of Hull .", + "length": 72 + }, + { + "text": "cars and spent £588,000 on his second son’s Bar Mitzvah celebration, .", + "length": 73 + }, + { + "text": "founders of the Stagecoach bus and rail group; Richard Caring, owner of .", + "length": 73 + }, + { + "text": "available, I direct the payable amount will be the nominal amount of £1 .", + "length": 74 + }, + { + "text": "of high-fliers, including Sir Brian Souter and his sister Ann Gloag, the .", + "length": 74 + }, + { + "text": "Leyton Orient Football Club, admitted one count of false accounting, one .", + "length": 74 + }, + { + "text": "millions of investors’ funds, promising to invest the money in lucrative .", + "length": 76 + }, + { + "text": "Now, because he is bankrupt, he has been given seven days to pay back a nominal sum of £1.", + "length": 91 + }, + { + "text": "But with interest and lost profits, his clients are believed to be £101,685,406 out of pocket.", + "length": 95 + }, + { + "text": "A city trader who conned millions of pounds from wealthy investors was yesterday ordered to pay back £1.", + "length": 105 + }, + { + "text": "But he was addicted to gambling, spending fortunes on spread betting, and had an insatiable taste  for luxury.", + "length": 111 + }, + { + "text": "He used the money to finance his own lavish lifestyle with private jets, super yachts and round-the-world trips.", + "length": 112 + }, + { + "text": "His gambling was huge, with investigators finding evidence of him blowing £720,000 on a cricket match bet in 2007.", + "length": 115 + }, + { + "text": "Victim: Stagecoach Group's co-founders, brother and sister Sir Brian Souter and Ann Gloag (pictured) lost £10million .", + "length": 119 + }, + { + "text": "The Serious Fraud Office found that Levene had conned £32,352,027 from some of Britain’s most successful businessmen.", + "length": 120 + }, + { + "text": "Living the high life, he had a chauffeur-driven Bentley and went on several holidays a year, each lasting several weeks.", + "length": 120 + }, + { + "text": "Investigators found evidence of round-the-world trips, yacht hire and top hotel stays in Australia, South Africa and Israel.", + "length": 124 + }, + { + "text": "Nicholas Levene, 48, was jailed for 13 years last November after he admitted orchestrating a lucrative Ponzi scheme which raked in £316million.", + "length": 144 + }, + { + "text": "High-flyer: Levene conned some of Britain's most successful businessmen while owning this £2million eight-bedroom property in Barnet, North London .", + "length": 149 + }, + { + "text": "Jailed: Levene, nicknamed Beano because of his love of the comic (pictured left on the trading floor in 1990, and right with wife Tracy), was jailed for 13 years in November last year .", + "length": 185 + }, + { + "text": "Must pay £1: Jailed city trader Nicholas Levene (pictured arriving at court in November last year), who conned wealthy investors out of £316million, was ordered to pay the nominal sum because he is bankrupt .", + "length": 210 + }, + { + "text": "Lavish lifestyle: Levene ran a multi-million pound illegal 'Ponzi' fraud scheme which he used to finance private jets (file picture), super yachts, a £150,000-a-year box at Ascot and on hosting £10,000-a-day pheasant shoots .", + "length": 227 + }, + { + "text": "Seeing stars: The fraudster spent £588,000 on his second son's Bar Mitzvah celebration, which featured a performance by girl band The Saturdays (file picture) Spent big: His fraud scheme meant he could pay for a £150,000-a-year box at Ascot (file picture) but with interest and potential profits considered, clients are believed to have lost out by £101.", + "length": 357 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6496451497077942 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:31:56.889690213Z", + "first_section_created": "2025-12-23T09:31:56.890043527Z", + "last_section_published": "2025-12-23T09:31:56.890290636Z", + "all_results_received": "2025-12-23T09:31:57.098840754Z", + "output_generated": "2025-12-23T09:31:57.099116065Z", + "total_processing_time_ms": 209, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 208, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:31:56.890043527Z", + "publish_time": "2025-12-23T09:31:56.890290636Z", + "first_worker_start": "2025-12-23T09:31:56.890883459Z", + "last_worker_end": "2025-12-23T09:31:57.09378Z", + "total_journey_time_ms": 203, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:56.890871358Z", + "start_time": "2025-12-23T09:31:56.891077866Z", + "end_time": "2025-12-23T09:31:56.891233572Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:56.891311Z", + "start_time": "2025-12-23T09:31:56.89147Z", + "end_time": "2025-12-23T09:31:57.09378Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 202 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:56.89092396Z", + "start_time": "2025-12-23T09:31:56.891081466Z", + "end_time": "2025-12-23T09:31:56.891278774Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:56.890759854Z", + "start_time": "2025-12-23T09:31:56.890883459Z", + "end_time": "2025-12-23T09:31:56.890966162Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 202, + "min_processing_ms": 202, + "max_processing_ms": 202, + "avg_processing_ms": 202, + "median_processing_ms": 202, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4869, + "slowest_section_id": 0, + "slowest_section_time_ms": 203 + } +} diff --git a/data/output/0000dfd9f52a470b9f29957686c2704b68cd0635.json b/data/output/0000dfd9f52a470b9f29957686c2704b68cd0635.json new file mode 100644 index 0000000..6e0bb95 --- /dev/null +++ b/data/output/0000dfd9f52a470b9f29957686c2704b68cd0635.json @@ -0,0 +1,512 @@ +{ + "file_name": "0000dfd9f52a470b9f29957686c2704b68cd0635.txt", + "total_words": 938, + "top_n_words": [ + { + "word": "her", + "count": 41 + }, + { + "word": "and", + "count": 35 + }, + { + "word": "the", + "count": 32 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "bella", + "count": 20 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "s", + "count": 15 + }, + { + "word": "with", + "count": 15 + }, + { + "word": "a", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "And at .", + "length": 8 + }, + { + "text": "Heat fan.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "'It's sad.", + "length": 10 + }, + { + "text": "Helen Pow .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "chemotherapy.", + "length": 13 + }, + { + "text": "It's very sad.", + "length": 14 + }, + { + "text": "It's very sad...", + "length": 16 + }, + { + "text": "Rhabdomyosarcoma.", + "length": 17 + }, + { + "text": "But rid they did.", + "length": 17 + }, + { + "text": "\" With that, he .", + "length": 17 + }, + { + "text": "SCROLL DOWN FOR VIDEO .", + "length": 23 + }, + { + "text": "10:51 EST, 29 May 2013 .", + "length": 24 + }, + { + "text": "09:16 EST, 29 May 2013 .", + "length": 24 + }, + { + "text": "in more than seven sites.", + "length": 25 + }, + { + "text": "But it's just basketball.", + "length": 25 + }, + { + "text": "And soon she was in remission.", + "length": 30 + }, + { + "text": "Two great teams, we're competing.", + "length": 33 + }, + { + "text": "It puts everything in perspective.", + "length": 34 + }, + { + "text": "and grueling chemotherapy sessions.", + "length": 35 + }, + { + "text": "bowed his head and gave up his spirit.", + "length": 38 + }, + { + "text": "Quoting the bible, they wrote: 'When .", + "length": 38 + }, + { + "text": "'I am able to walk, run and jump again!", + "length": 39 + }, + { + "text": "She went peacefully, surrounded by love...", + "length": 42 + }, + { + "text": "Click here to donate to Bella's charity fund.", + "length": 46 + }, + { + "text": "September 2011, when the cancer attacked her ovary.", + "length": 51 + }, + { + "text": "She won again, however, and was in remission until .", + "length": 52 + }, + { + "text": "At the end of the day, this game is just basketball.", + "length": 52 + }, + { + "text": "what happened to Bella puts everything in perspective.", + "length": 54 + }, + { + "text": "he had received the drink, Jesus said, \"It is finished.", + "length": 55 + }, + { + "text": "' Before adding: 'Bella has now joined Jesus in heaven.", + "length": 55 + }, + { + "text": "' her parents exclaimed for her on her website at the time.", + "length": 59 + }, + { + "text": "Bella's parents, Raymond and Shannah, and Bella's younger .", + "length": 59 + }, + { + "text": "Though despite those treatments, Bella's organs began failing.", + "length": 62 + }, + { + "text": "in her brain and she underwent full brain radiation plus more .", + "length": 63 + }, + { + "text": "tumor on her spine and was diagnosed with stage four Alveolar .", + "length": 63 + }, + { + "text": "in 2007, at the tender age of just four, she became paralyzed from a .", + "length": 70 + }, + { + "text": "wore shoes with #LiveLikeBella written on them in honor of the brave .", + "length": 70 + }, + { + "text": "was thrown into a life of chemotherapy - 54 weeks of it - radiation - .", + "length": 71 + }, + { + "text": "the dogged illness returned in April 2009, when doctors found a tumor .", + "length": 71 + }, + { + "text": "'You have a little girl, or any kid that loses her life over an illness.", + "length": 72 + }, + { + "text": "16 weeks - and surgery as doctors battled to rid her little body tumors .", + "length": 73 + }, + { + "text": "sister, Rayna, coined the phrase 'Live Like Bella' during her extensive .", + "length": 73 + }, + { + "text": "And that's what her heartbroken parents asked for most of all on Tuesday.", + "length": 73 + }, + { + "text": "Tuesday night's Miami Heat Vs Pacers game, LeBron James and Dwyane Wade .", + "length": 73 + }, + { + "text": "Tributes: Tributes are piling in for Bella who inspired many with her courage .", + "length": 79 + }, + { + "text": "Hospice: Bella, pictured, had been under hospice care since the beginning of May .", + "length": 82 + }, + { + "text": "But she passed away yesterday after fighting the disease for more than half her young life.", + "length": 91 + }, + { + "text": "' Strong: The little girl loved to watch the Heat play, and copy her idol, LeBron, right, .", + "length": 91 + }, + { + "text": "' Bella was a happy, bubbly toddler who loved Sponge Bob Square Pants and playing with Lego.", + "length": 92 + }, + { + "text": "'Please pray for us now for the strength and peace to continue to #LIVELIKEBELLA,' they said.", + "length": 93 + }, + { + "text": "The following January, three new tumors were discovered in her abdomen and she had further treatment.", + "length": 101 + }, + { + "text": "Five times over: Bella had made news for fighting cancer five times in the course of her short life .", + "length": 101 + }, + { + "text": "Bella Rodriguez-Torres, 10, loved nothing more than watching the Heat play basketball with her family.", + "length": 102 + }, + { + "text": "'There are few words that can describe what it feels like to have to make a decision of this magnitude.", + "length": 103 + }, + { + "text": "At the start of May, her parents made the difficult decision to stop treatment and she entered a hospice.", + "length": 105 + }, + { + "text": "Close: Bella Rodriguez-Torres' family, pictured with her, were by her side throughout her grueling battle .", + "length": 107 + }, + { + "text": "'(Dwyane) came to me with the story today, it was a no-brainer,' James said on Tuesday, according to Yahoo Sport.", + "length": 113 + }, + { + "text": "Many well-wishers have donated money to help pay for her care and prayed for the brave little girl and her family.", + "length": 114 + }, + { + "text": "Brave battle: Bella Rodriguez-Torres, pictured left and right, died on Tuesday after a six year battle with cancer .", + "length": 116 + }, + { + "text": "'Bella didn't lose her battle with cancer but instead won the reward of an eternal life,' her mother, Shannah, told CBS4.", + "length": 121 + }, + { + "text": "Shannah and Bella's father, Raymond, broke the sad news on their daughter's Facebook page, which has more than 62,000 followers.", + "length": 128 + }, + { + "text": "her parents have chronicled her progress, and the high highs and extremely low lows have been followed by thousands around the world.", + "length": 133 + }, + { + "text": "Heat fan: Basketball star LeBron James wore sneakers, pictured, with Bell's name on them on Tuesday night in the little Heat fan's honor .", + "length": 138 + }, + { + "text": "While it has been painstaking to decide to no longer fight and now just ensure her comfort, we are at peace,' her mother told CBS4 earlier this month.", + "length": 150 + }, + { + "text": "But just a month later, four tumors were found in her pelvis and by January 2013, despite undergoing chemotherapy again, doctors found the tumors had grown.", + "length": 156 + }, + { + "text": "Her kidney and lung functions were greatly affected and her bone marrow was exhausted plus ultrasounds of Bella's lungs showed new disease had appeared on the lining.", + "length": 166 + }, + { + "text": "Bella was diagnosed with an aggressive form of cancer at age four, and astounded doctors when she recovered from the illness - something her parents deemed a miracle.", + "length": 166 + }, + { + "text": "But last year, her tumors returned, and, despite putting up another fierce fight, her condition deteriorated and she slipped away peacefully surrounded by family and friends.", + "length": 174 + }, + { + "text": "Family: Bella Rodriguez-Torres, pictured bottom left in happy times with her father Raymond, mother Shannah and little sister, bottom right, Rayna, was diagnosed with cancer aged four .", + "length": 185 + }, + { + "text": "Miami Heat superstar LeBron James gave a touching tribute to one of his biggest fans last night - wearing her name on his sneakers just hours after she lost a grueling six-year battle with cancer.", + "length": 196 + }, + { + "text": "The seriousness of her condition led doctors to recommend intensive surgery, and in March, Bella traveled from Miami to Houston to have a 16 hour surgery to remove the tumors, her uterus, her ovaries and to have her entire abdomen washed in hot chemo.", + "length": 251 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5555009841918945 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:31:57.39107875Z", + "first_section_created": "2025-12-23T09:31:57.391382661Z", + "last_section_published": "2025-12-23T09:31:57.391744675Z", + "all_results_received": "2025-12-23T09:31:57.476526594Z", + "output_generated": "2025-12-23T09:31:57.476827505Z", + "total_processing_time_ms": 85, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 84, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:31:57.391382661Z", + "publish_time": "2025-12-23T09:31:57.391669172Z", + "first_worker_start": "2025-12-23T09:31:57.392431601Z", + "last_worker_end": "2025-12-23T09:31:57.475587Z", + "total_journey_time_ms": 84, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:57.3924152Z", + "start_time": "2025-12-23T09:31:57.392560706Z", + "end_time": "2025-12-23T09:31:57.392718912Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:57.392681Z", + "start_time": "2025-12-23T09:31:57.392819Z", + "end_time": "2025-12-23T09:31:57.475587Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:57.392451702Z", + "start_time": "2025-12-23T09:31:57.392546805Z", + "end_time": "2025-12-23T09:31:57.392707611Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:57.392333697Z", + "start_time": "2025-12-23T09:31:57.392431601Z", + "end_time": "2025-12-23T09:31:57.392535705Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:31:57.391700573Z", + "publish_time": "2025-12-23T09:31:57.391744675Z", + "first_worker_start": "2025-12-23T09:31:57.392351798Z", + "last_worker_end": "2025-12-23T09:31:57.458136Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:57.392350998Z", + "start_time": "2025-12-23T09:31:57.3923959Z", + "end_time": "2025-12-23T09:31:57.3924075Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:57.392779Z", + "start_time": "2025-12-23T09:31:57.392928Z", + "end_time": "2025-12-23T09:31:57.458136Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:57.392504804Z", + "start_time": "2025-12-23T09:31:57.392551506Z", + "end_time": "2025-12-23T09:31:57.392595107Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:57.392281895Z", + "start_time": "2025-12-23T09:31:57.392351798Z", + "end_time": "2025-12-23T09:31:57.392370799Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 147, + "min_processing_ms": 65, + "max_processing_ms": 82, + "avg_processing_ms": 73, + "median_processing_ms": 82, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2731, + "slowest_section_id": 0, + "slowest_section_time_ms": 84 + } +} diff --git a/data/output/000128cbd36642ced67ac90bd7d4d1dd5e8cf554.json b/data/output/000128cbd36642ced67ac90bd7d4d1dd5e8cf554.json new file mode 100644 index 0000000..96332ec --- /dev/null +++ b/data/output/000128cbd36642ced67ac90bd7d4d1dd5e8cf554.json @@ -0,0 +1,736 @@ +{ + "file_name": "000128cbd36642ced67ac90bd7d4d1dd5e8cf554.txt", + "total_words": 1359, + "top_n_words": [ + { + "word": "the", + "count": 96 + }, + { + "word": "to", + "count": 41 + }, + { + "word": "of", + "count": 40 + }, + { + "word": "and", + "count": 33 + }, + { + "word": "in", + "count": 30 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "ebola", + "count": 24 + }, + { + "word": "is", + "count": 18 + }, + { + "word": "are", + "count": 16 + }, + { + "word": "drug", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "A .", + "length": 3 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "Mr .", + "length": 4 + }, + { + "text": "It .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "said.", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "This .", + "length": 6 + }, + { + "text": "years.", + "length": 6 + }, + { + "text": "death.", + "length": 6 + }, + { + "text": "Chief .", + "length": 7 + }, + { + "text": "ZMapp, .", + "length": 8 + }, + { + "text": "Despite .", + "length": 9 + }, + { + "text": "'General .", + "length": 10 + }, + { + "text": "Fujifilm .", + "length": 10 + }, + { + "text": "the virus.", + "length": 10 + }, + { + "text": "the virus.", + "length": 10 + }, + { + "text": "Rigorous .", + "length": 10 + }, + { + "text": "sufferers.", + "length": 10 + }, + { + "text": "Favipiravir .", + "length": 13 + }, + { + "text": "work on ebola.", + "length": 14 + }, + { + "text": "Recently, two .", + "length": 15 + }, + { + "text": "ministry in March.", + "length": 18 + }, + { + "text": "Health officials .", + "length": 18 + }, + { + "text": "Several drugs are .", + "length": 19 + }, + { + "text": "Some 910 cases have .", + "length": 21 + }, + { + "text": "Most patients die of .", + "length": 22 + }, + { + "text": "outside the laboratory.", + "length": 23 + }, + { + "text": "their major organs fail.", + "length": 24 + }, + { + "text": "scared to come into work.", + "length": 25 + }, + { + "text": "improved chance of survival.", + "length": 28 + }, + { + "text": "rapidly expanding ebola outbreak.", + "length": 33 + }, + { + "text": "Sierra Leone, Guinea and Liberia.", + "length": 33 + }, + { + "text": "being developed for ebola treatment.", + "length": 36 + }, + { + "text": "But they are still in early stages .", + "length": 36 + }, + { + "text": "standard of healthcare as in the UK.", + "length": 36 + }, + { + "text": "missionaries who contracted the virus.", + "length": 38 + }, + { + "text": "If they are hydrated and the bleeding .", + "length": 39 + }, + { + "text": "Japanese officials today said they are .", + "length": 40 + }, + { + "text": "The company is also in talks with the U.", + "length": 40 + }, + { + "text": "decision by the World Health Organisation.", + "length": 42 + }, + { + "text": "developed by San Diego Mapp Biopharmaceutical.", + "length": 46 + }, + { + "text": "The only other British case was in 1976, when .", + "length": 47 + }, + { + "text": "with environments contaminated with such fluids.", + "length": 48 + }, + { + "text": "will significantly boost his chance of survival.", + "length": 48 + }, + { + "text": "virus while trying to save lives in Sierra Leone.", + "length": 49 + }, + { + "text": "hiding infected friends and family in their homes.", + "length": 50 + }, + { + "text": "and theoretically similar effects can be expected on Ebola.", + "length": 59 + }, + { + "text": "promising results, apparently aiding the recovery of two U.", + "length": 59 + }, + { + "text": "Favipiravir is one of only a few drugs that may work on ebola.", + "length": 62 + }, + { + "text": "the grave dangers, he had selflessly volunteered to serve in a .", + "length": 64 + }, + { + "text": "Manufacturers of the drug say stocks of the drug are exhausted .", + "length": 64 + }, + { + "text": "international partners to source remaining doses for the patient.", + "length": 65 + }, + { + "text": "fit, young patients who are given quick treatment have a vastly .", + "length": 65 + }, + { + "text": "function and, in some cases, both internal and external bleeding.", + "length": 65 + }, + { + "text": "Pooley is the only Briton to be have ever been infected by ebola .", + "length": 66 + }, + { + "text": "and there is no proven treatment or vaccine for the highly fatal .", + "length": 66 + }, + { + "text": "strain due to the Ebola outbreak, and unable to provide the same .", + "length": 66 + }, + { + "text": "inhibits viral gene replication within infected cells to prevent .", + "length": 66 + }, + { + "text": "and Nancy Writebol recovered from Ebola after receiving the serum.", + "length": 66 + }, + { + "text": "the number of fresh graves as a rough estimate of suspected cases.", + "length": 66 + }, + { + "text": "source at the Department of Health said: 'We are working with our .", + "length": 67 + }, + { + "text": "comes as a the first British victim of the outbreak, nurse William .", + "length": 68 + }, + { + "text": "re-emerging influenza viruses, was approved by the Japanese health .", + "length": 68 + }, + { + "text": "week which urged people to carefully assess their need to travel to .", + "length": 69 + }, + { + "text": "is followed by vomiting, diarrhoea, rash, impaired kidney and liver .", + "length": 69 + }, + { + "text": "an untested drug only ever used on a handful of patients, has shown .", + "length": 69 + }, + { + "text": "Fujifilm said it has favipiravir stock for more than 20,000 patients.", + "length": 69 + }, + { + "text": "makeshift clinic where other nurses had died from ebola or were too .", + "length": 69 + }, + { + "text": "individual requests for the favipiravir tablet, before any official .", + "length": 69 + }, + { + "text": "magnitude of the outbreak has been underestimated because people are .", + "length": 70 + }, + { + "text": "cabinet secretary Yoshihide Suga said Japan may respond to emergency .", + "length": 70 + }, + { + "text": "notifying health officials and with no investigation of the cause of .", + "length": 70 + }, + { + "text": "WHO reported that corpses are being buried in rural villages without .", + "length": 70 + }, + { + "text": "disease, and Fujifilm's drug is one of only a few new drugs that may .", + "length": 70 + }, + { + "text": "He said the drug has also proved effective in lab experiments on mice.", + "length": 70 + }, + { + "text": "quarantine measures are used to stop the spread of ebola, as well as .", + "length": 70 + }, + { + "text": "caught hiding an Ebola patient can receive prison terms of up to two .", + "length": 70 + }, + { + "text": "high standards of hygiene for anyone who might come into contact with .", + "length": 71 + }, + { + "text": "WHO said the disease can be passed between people by direct contact - .", + "length": 71 + }, + { + "text": "Pooley, 29, was airlifted back to London after contracting the deadly .", + "length": 71 + }, + { + "text": "some instances epidemiologists have travelled to villages and counted .", + "length": 71 + }, + { + "text": "medical facilities throughout Sierra Leone are currently under severe .", + "length": 71 + }, + { + "text": "ebola because of dehydration, when their blood vessels break down and .", + "length": 71 + }, + { + "text": "through broken skin or mucous membranes - with the blood, secretions, .", + "length": 71 + }, + { + "text": "organs or other bodily fluids of infected people, and indirect contact .", + "length": 72 + }, + { + "text": "Sierra Leone parliament has voted to pass a new law which means anyone .", + "length": 72 + }, + { + "text": "of new viral particles to prevent the spread of infection, the company .", + "length": 72 + }, + { + "text": "controlled, their immune systems can in some cases kick in, and defeat .", + "length": 72 + }, + { + "text": "It is aimed at boosting the immune system's efforts to fight the disease.", + "length": 73 + }, + { + "text": "last night said Mr Pooley was 'not seriously unwell' – a factor which .", + "length": 73 + }, + { + "text": "ready to provide an anti-flu drug as a potential treatment to fight the .", + "length": 73 + }, + { + "text": "spokesman Takao Aoki said ebola and influenza viruses are the same type .", + "length": 73 + }, + { + "text": "drug, developed by a subsidiary of Fujifilm Holdings to treat novel and .", + "length": 73 + }, + { + "text": "It is available at any time, at the request of WHO, officials said today.", + "length": 73 + }, + { + "text": "Foreign and Commonwealth Office (FCO) issued updated travel advice last .", + "length": 73 + }, + { + "text": "scientist Geoffrey Platt pricked himself with a needle contaminated with .", + "length": 74 + }, + { + "text": "propagation, while conventional ones are designed to inhibit the release .", + "length": 74 + }, + { + "text": "been recorded in the country - and 392 deaths - but the WHO believes the .", + "length": 74 + }, + { + "text": "'Dedicated healthcare facilities for Ebola are overwhelmed,' the FCO warned.", + "length": 76 + }, + { + "text": "Food and Drug Administration on clinical testing of the drug in treating Ebola.", + "length": 79 + }, + { + "text": "The effects of the disease normally appear between two and 21 days after infection.", + "length": 83 + }, + { + "text": "The patient was flown into an airport near London, then driven across the capital .", + "length": 83 + }, + { + "text": "' The WHO has put the number of people infected with the deadly ebola virus at 2,615.", + "length": 85 + }, + { + "text": "But Mapp Biopharmaceutical, the American company behind it, says stocks are exhausted.", + "length": 86 + }, + { + "text": "Until the drug was given to the American Ebola sufferers, it had only been tested on monkeys.", + "length": 93 + }, + { + "text": "It is only available in limited supplies as larger scale human testing is needed to prove it safe and effective.", + "length": 112 + }, + { + "text": "Symptoms of the virus appear as a sudden onset of fever, intense weakness, muscle pain, headache and sore throat.", + "length": 113 + }, + { + "text": "Aid worker Nancy Writebol, pictured with her husband David, was also treated at the Atlanta hospital using Zmapp.", + "length": 113 + }, + { + "text": "Last night the NHS started a global hunt for remaining supplies of the only treatment thought to combat the virus.", + "length": 114 + }, + { + "text": "Treatment: The hospital is the only centre in the UK equipped to treat ebola and prevent the disease from spreading .", + "length": 117 + }, + { + "text": "It is transmitted to people from wild animals and spreads in the human population through person-to-person transmission.", + "length": 120 + }, + { + "text": "Mr Pooley last night arrived back to London in an isolation sack for treatment at the Royal Free Hospital in north London.", + "length": 122 + }, + { + "text": "The drug had never been tested on humans, although an early version had been found to work in some ebola-infected monkeys.", + "length": 122 + }, + { + "text": "Americans, Dr Kent Brantly and aid worker Nancy Writebol, have been treated with another experimental drug called ZMapp, .", + "length": 122 + }, + { + "text": "Pictures have emerged of the moment a British charity worker was evacuated back to the UK on board a Royal Air Force jet .", + "length": 122 + }, + { + "text": "Some 1,427 have died since the disease was identified in Guinea in March, before spreading to Sierra Leone, Liberia and Nigeria.", + "length": 128 + }, + { + "text": "' The WHO said earlier this month that it is ethical to use untested drugs on ebola patients given the magnitude of the outbreak.", + "length": 129 + }, + { + "text": "Male nurse William Pooley, 29, was airlifted back to London after contracting the deadly virus while trying to save lives in Sierra Leone .", + "length": 139 + }, + { + "text": "The companies said in a statement: 'It is important to note that the emergency use of an experimental medicine is a highly unusual situation.", + "length": 141 + }, + { + "text": "The companies behind the drug, ZMapp and Kentucky BioProcessing are currently working to have the product approved and the production accelerated.", + "length": 146 + }, + { + "text": "'As a consequence global high-level discussions concerning the policy, ethical, and medical implications of this exceptional situation have been initiated.", + "length": 155 + }, + { + "text": "The British man infected with the Ebola virus is loaded into an Royal Air Force ambulance after being flown home on a C17 plane from Sierra Leone, at Northolt air base outside London .", + "length": 184 + }, + { + "text": "doctor Kent Brantly is pictured saying goodbye to the team that saved him at Emory University Hospital in Atlanta, Georgia after he was successfully treated for ebola using the experimental drug Zmapp .", + "length": 202 + }, + { + "text": "A Japanese firm, a subsidiary of Fujifilm Holdings, said it has 20,000 doses of the flu drug favipiravir available to treat ebola victims, after the drug proved effective treating mice infected with the deadly virus, pictured .", + "length": 227 + }, + { + "text": "British charity worker, Mr Pooley, from Woodbridge in Suffolk, was diagnosed with ebola and airlifted out of Sierra Leone and taken to the High Security Infectious Disease Unit at the Royal Free Hospital in London (pictured) Experimental serum ZMapp was created from a combination of antibodies in January.", + "length": 306 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4859484136104584 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:31:57.892498787Z", + "first_section_created": "2025-12-23T09:31:57.892935904Z", + "last_section_published": "2025-12-23T09:31:57.893382021Z", + "all_results_received": "2025-12-23T09:31:57.997642179Z", + "output_generated": "2025-12-23T09:31:57.99793799Z", + "total_processing_time_ms": 105, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 104, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:31:57.892935904Z", + "publish_time": "2025-12-23T09:31:57.893229715Z", + "first_worker_start": "2025-12-23T09:31:57.894284655Z", + "last_worker_end": "2025-12-23T09:31:57.99672Z", + "total_journey_time_ms": 103, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:57.894218552Z", + "start_time": "2025-12-23T09:31:57.894284655Z", + "end_time": "2025-12-23T09:31:57.894391859Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:57.894517Z", + "start_time": "2025-12-23T09:31:57.894664Z", + "end_time": "2025-12-23T09:31:57.99672Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 102 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:57.894328757Z", + "start_time": "2025-12-23T09:31:57.894395759Z", + "end_time": "2025-12-23T09:31:57.894534064Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:57.894284655Z", + "start_time": "2025-12-23T09:31:57.894347657Z", + "end_time": "2025-12-23T09:31:57.89441436Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:31:57.893281417Z", + "publish_time": "2025-12-23T09:31:57.893382021Z", + "first_worker_start": "2025-12-23T09:31:57.894335457Z", + "last_worker_end": "2025-12-23T09:31:57.987023Z", + "total_journey_time_ms": 93, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:57.894354758Z", + "start_time": "2025-12-23T09:31:57.89440666Z", + "end_time": "2025-12-23T09:31:57.894537765Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:57.894519Z", + "start_time": "2025-12-23T09:31:57.894672Z", + "end_time": "2025-12-23T09:31:57.987023Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 92 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:57.894403259Z", + "start_time": "2025-12-23T09:31:57.894454661Z", + "end_time": "2025-12-23T09:31:57.895142188Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:57.894265854Z", + "start_time": "2025-12-23T09:31:57.894335457Z", + "end_time": "2025-12-23T09:31:57.894391459Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 194, + "min_processing_ms": 92, + "max_processing_ms": 102, + "avg_processing_ms": 97, + "median_processing_ms": 102, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4181, + "slowest_section_id": 0, + "slowest_section_time_ms": 103 + } +} diff --git a/data/output/0001d1afc246a7964130f43ae940af6bc6c57f01.json b/data/output/0001d1afc246a7964130f43ae940af6bc6c57f01.json new file mode 100644 index 0000000..e5cf0da --- /dev/null +++ b/data/output/0001d1afc246a7964130f43ae940af6bc6c57f01.json @@ -0,0 +1,672 @@ +{ + "file_name": "0001d1afc246a7964130f43ae940af6bc6c57f01.txt", + "total_words": 1582, + "top_n_words": [ + { + "word": "the", + "count": 90 + }, + { + "word": "to", + "count": 53 + }, + { + "word": "s", + "count": 37 + }, + { + "word": "a", + "count": 36 + }, + { + "word": "in", + "count": 30 + }, + { + "word": "of", + "count": 29 + }, + { + "word": "obama", + "count": 28 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "said", + "count": 26 + }, + { + "word": "syria", + "count": 25 + } + ], + "sorted_sentences": [ + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "\" U.", + "length": 4 + }, + { + "text": "Top U.", + "length": 6 + }, + { + "text": "Map: U.", + "length": 7 + }, + { + "text": "action.", + "length": 7 + }, + { + "text": "Some U.", + "length": 7 + }, + { + "text": "Iran: U.", + "length": 8 + }, + { + "text": "It's unclear.", + "length": 13 + }, + { + "text": "British and U.", + "length": 14 + }, + { + "text": "Security Council.", + "length": 17 + }, + { + "text": "It's official: U.", + "length": 17 + }, + { + "text": "5 key assertions: U.", + "length": 20 + }, + { + "text": "Robert Menendez said.", + "length": 21 + }, + { + "text": "ground forces, he said.", + "length": 23 + }, + { + "text": "inspectors leave Syria .", + "length": 24 + }, + { + "text": "\"So we are quite concerned.", + "length": 27 + }, + { + "text": "Syria's government unfazed .", + "length": 28 + }, + { + "text": "\"Like many other Americans...", + "length": 29 + }, + { + "text": "intelligence report on Syria .", + "length": 30 + }, + { + "text": "and allied assets around Syria .", + "length": 32 + }, + { + "text": "What do Syria's neighbors think?", + "length": 32 + }, + { + "text": "What will happen if they vote no?", + "length": 33 + }, + { + "text": "weapons inspectors find in Syria?", + "length": 33 + }, + { + "text": "Obama decision came Friday night .", + "length": 34 + }, + { + "text": "What happens if Congress votes no?", + "length": 34 + }, + { + "text": "Reactions mixed to Obama's speech .", + "length": 35 + }, + { + "text": "Syrian crisis: Latest developments .", + "length": 36 + }, + { + "text": "Obama's remarks came shortly after U.", + "length": 37 + }, + { + "text": "Transcript: Read Obama's full remarks .", + "length": 39 + }, + { + "text": "Why Russia, China, Iran stand by Assad .", + "length": 40 + }, + { + "text": "And how will the Syrian government react?", + "length": 41 + }, + { + "text": "\" Syria missile strike: What would happen next?", + "length": 47 + }, + { + "text": "No explanation was offered for the discrepancy.", + "length": 47 + }, + { + "text": "The inspectors will share their findings with U.", + "length": 48 + }, + { + "text": "\" Bergen: Syria is a problem from hell for the U.", + "length": 50 + }, + { + "text": "military officials said they remained at the ready.", + "length": 51 + }, + { + "text": "spokesman Martin Nesirky told reporters on Saturday.", + "length": 52 + }, + { + "text": "Syria: Who wants what after chemical weapons horror .", + "length": 53 + }, + { + "text": "\" Some members of Congress applauded Obama's decision.", + "length": 54 + }, + { + "text": "Any military attack would not be open-ended or include U.", + "length": 57 + }, + { + "text": "There are key questions looming over the debate: What did U.", + "length": 60 + }, + { + "text": "In a world with many dangers, this menace must be confronted.", + "length": 61 + }, + { + "text": "Syria's prime minister appeared unfazed by the saber-rattling.", + "length": 62 + }, + { + "text": "team's final report is completed before presenting it to the U.", + "length": 63 + }, + { + "text": "intelligence reports say the attack involved chemical weapons, but U.", + "length": 69 + }, + { + "text": "Meanwhile, as uncertainty loomed over how Congress would weigh in, U.", + "length": 69 + }, + { + "text": "On Saturday, Obama said \"all told, well over 1,000 people were murdered.", + "length": 72 + }, + { + "text": "\"I understand and support Barack Obama's position on Syria,\" Cameron said.", + "length": 74 + }, + { + "text": "Secretary-General Ban Ki-moon Ban, who has said he wants to wait until the U.", + "length": 77 + }, + { + "text": "On Friday night, the president made a last-minute decision to consult lawmakers.", + "length": 80 + }, + { + "text": "\"We should have this debate, because the issues are too big for business as usual.", + "length": 82 + }, + { + "text": "\"It needs time to be able to analyze the information and the samples,\" Nesirky said.", + "length": 84 + }, + { + "text": "Obama on Saturday continued to shore up support for a strike on the al-Assad government.", + "length": 88 + }, + { + "text": "He spoke by phone with French President Francois Hollande before his Rose Garden speech.", + "length": 88 + }, + { + "text": "British intelligence had put the number of people killed in the attack at more than 350.", + "length": 88 + }, + { + "text": "officials have stressed the importance of waiting for an official report from inspectors.", + "length": 89 + }, + { + "text": "The Senate Foreign Relations Committee will hold a hearing over the matter on Tuesday, Sen.", + "length": 91 + }, + { + "text": "President Barack Obama wants lawmakers to weigh in on whether to use military force in Syria.", + "length": 93 + }, + { + "text": "military action in Syria would spark 'disaster' Opinion: Why strikes in Syria are a bad idea .", + "length": 94 + }, + { + "text": "Both Obama and Cameron, he said, \"climbed to the top of the tree and don't know how to get down.", + "length": 96 + }, + { + "text": "\" It's a step that is set to turn an international crisis into a fierce domestic political battle.", + "length": 98 + }, + { + "text": "Secretary of State John Kerry on Friday cited a death toll of 1,429, more than 400 of them children.", + "length": 100 + }, + { + "text": "lawmakers have called for immediate action while others warn of stepping into what could become a quagmire.", + "length": 107 + }, + { + "text": "In the United States, scattered groups of anti-war protesters around the country took to the streets Saturday.", + "length": 110 + }, + { + "text": "\"Under the Constitution, the responsibility to declare war lies with Congress,\" the Republican lawmakers said.", + "length": 110 + }, + { + "text": "Syria's alleged use of chemical weapons earlier this month \"is an assault on human dignity,\" the president said.", + "length": 112 + }, + { + "text": "On Saturday, Obama proposed what he said would be a limited military action against Syrian President Bashar al-Assad.", + "length": 117 + }, + { + "text": "An influential lawmaker in Russia -- which has stood by Syria and criticized the United States -- had his own theory.", + "length": 117 + }, + { + "text": "A spokesman for the Syrian National Coalition said that the opposition group was disappointed by Obama's announcement.", + "length": 118 + }, + { + "text": "\" Obama said top congressional leaders had agreed to schedule a debate when the body returns to Washington on September 9.", + "length": 122 + }, + { + "text": "An anchor on Syrian state television said Obama \"appeared to be preparing for an aggression on Syria based on repeated lies.", + "length": 124 + }, + { + "text": "\"The aim of the game here, the mandate, is very clear -- and that is to ascertain whether chemical weapons were used -- and not by whom,\" U.", + "length": 140 + }, + { + "text": "\"We are glad the president is seeking authorization for any military action in Syria in response to serious, substantive questions being raised.", + "length": 144 + }, + { + "text": "\"Our fear now is that the lack of action could embolden the regime and they repeat his attacks in a more serious way,\" said spokesman Louay Safi.", + "length": 145 + }, + { + "text": "inspectors left Syria, carrying evidence that will determine whether chemical weapons were used in an attack early last week in a Damascus suburb.", + "length": 146 + }, + { + "text": "But who used the weapons in the reported toxic gas attack in a Damascus suburb on August 21 has been a key point of global debate over the Syrian crisis.", + "length": 153 + }, + { + "text": "He noted that Ban has repeatedly said there is no alternative to a political solution to the crisis in Syria, and that \"a military solution is not an option.", + "length": 157 + }, + { + "text": "\" More than 160 legislators, including 63 of Obama's fellow Democrats, had signed letters calling for either a vote or at least a \"full debate\" before any U.", + "length": 157 + }, + { + "text": "\"I think he has done well by doing what Cameron did in terms of taking the issue to Parliament,\" said Bashar Jaafari, Syria's ambassador to the United Nations.", + "length": 159 + }, + { + "text": "A senior administration official told CNN that Obama has the authority to act without Congress -- even if Congress rejects his request for authorization to use force.", + "length": 166 + }, + { + "text": "A failure to respond with force, Obama argued, \"could lead to escalating use of chemical weapons or their proliferation to terrorist groups who would do our people harm.", + "length": 170 + }, + { + "text": "we're just tired of the United States getting involved and invading and bombing other countries,\" said Robin Rosecrans, who was among hundreds at a Los Angeles demonstration.", + "length": 174 + }, + { + "text": "officials have said there's no doubt that the Syrian government was behind it, while Syrian officials have denied responsibility and blamed jihadists fighting with the rebels.", + "length": 175 + }, + { + "text": "In a televised address from the White House Rose Garden earlier Saturday, the president said he would take his case to Congress, not because he has to -- but because he wants to.", + "length": 178 + }, + { + "text": "House Speaker John Boehner, Majority Leader Eric Cantor, Majority Whip Kevin McCarthy and Conference Chair Cathy McMorris Rodgers issued a statement Saturday praising the president.", + "length": 181 + }, + { + "text": "The Organization for the Prohibition of Chemical Weapons, which nine of the inspectors belong to, said Saturday that it could take up to three weeks to analyze the evidence they collected.", + "length": 188 + }, + { + "text": "\" A top Syrian diplomat told the state television network that Obama was facing pressure to take military action from Israel, Turkey, some Arabs and right-wing extremists in the United States.", + "length": 192 + }, + { + "text": "Some global leaders have expressed support, but the British Parliament's vote against military action earlier this week was a blow to Obama's hopes of getting strong backing from key NATO allies.", + "length": 195 + }, + { + "text": "\" The Syrian government has denied that it used chemical weapons in the August 21 attack, saying that jihadists fighting with the rebels used them in an effort to turn global sentiments against it.", + "length": 197 + }, + { + "text": "British Prime Minister David Cameron, whose own attempt to get lawmakers in his country to support military action in Syria failed earlier this week, responded to Obama's speech in a Twitter post Saturday.", + "length": 205 + }, + { + "text": "Obama: 'This menace must be confronted' Obama's senior advisers have debated the next steps to take, and the president's comments Saturday came amid mounting political pressure over the situation in Syria.", + "length": 205 + }, + { + "text": "The proposed legislation from Obama asks Congress to approve the use of military force \"to deter, disrupt, prevent and degrade the potential for future uses of chemical weapons or other weapons of mass destruction.", + "length": 214 + }, + { + "text": "Obama sent a letter to the heads of the House and Senate on Saturday night, hours after announcing that he believes military action against Syrian targets is the right step to take over the alleged use of chemical weapons.", + "length": 222 + }, + { + "text": "\"While I believe I have the authority to carry out this military action without specific congressional authorization, I know that the country will be stronger if we take this course, and our actions will be even more effective,\" he said.", + "length": 237 + }, + { + "text": "deliver a resolute message to the Assad regime -- and others who would consider using chemical weapons -- that these crimes are unacceptable and those who violate this international norm will be held accountable by the world,\" the White House said.", + "length": 248 + }, + { + "text": "After Obama's speech, a military and political analyst on Syrian state TV said Obama is \"embarrassed\" that Russia opposes military action against Syria, is \"crying for help\" for someone to come to his rescue and is facing two defeats -- on the political and military levels.", + "length": 274 + }, + { + "text": "\"The Syrian Army's status is on maximum readiness and fingers are on the trigger to confront all challenges,\" Wael Nader al-Halqi said during a meeting with a delegation of Syrian expatriates from Italy, according to a banner on Syria State TV that was broadcast prior to Obama's address.", + "length": 288 + }, + { + "text": "\"The main reason Obama is turning to the Congress: the military operation did not get enough support either in the world, among allies of the US or in the United States itself,\" Alexei Pushkov, chairman of the international-affairs committee of the Russian State Duma, said in a Twitter post.", + "length": 293 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6240751296281815 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:31:58.394181035Z", + "first_section_created": "2025-12-23T09:31:58.394556549Z", + "last_section_published": "2025-12-23T09:31:58.395013866Z", + "all_results_received": "2025-12-23T09:31:58.513232155Z", + "output_generated": "2025-12-23T09:31:58.513525466Z", + "total_processing_time_ms": 119, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 118, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:31:58.394556549Z", + "publish_time": "2025-12-23T09:31:58.39485836Z", + "first_worker_start": "2025-12-23T09:31:58.395407681Z", + "last_worker_end": "2025-12-23T09:31:58.512414Z", + "total_journey_time_ms": 117, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:58.395617989Z", + "start_time": "2025-12-23T09:31:58.395751894Z", + "end_time": "2025-12-23T09:31:58.395850098Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:58.395875Z", + "start_time": "2025-12-23T09:31:58.39607Z", + "end_time": "2025-12-23T09:31:58.512414Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 116 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:58.395471984Z", + "start_time": "2025-12-23T09:31:58.395561387Z", + "end_time": "2025-12-23T09:31:58.395706193Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:58.395339179Z", + "start_time": "2025-12-23T09:31:58.395407681Z", + "end_time": "2025-12-23T09:31:58.395521386Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:31:58.394901862Z", + "publish_time": "2025-12-23T09:31:58.395013866Z", + "first_worker_start": "2025-12-23T09:31:58.39564509Z", + "last_worker_end": "2025-12-23T09:31:58.462467Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:58.395723193Z", + "start_time": "2025-12-23T09:31:58.395771295Z", + "end_time": "2025-12-23T09:31:58.395852298Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:58.395889Z", + "start_time": "2025-12-23T09:31:58.396009Z", + "end_time": "2025-12-23T09:31:58.462467Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:58.395663991Z", + "start_time": "2025-12-23T09:31:58.395741194Z", + "end_time": "2025-12-23T09:31:58.395954502Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:58.395585988Z", + "start_time": "2025-12-23T09:31:58.39564509Z", + "end_time": "2025-12-23T09:31:58.395748794Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 182, + "min_processing_ms": 66, + "max_processing_ms": 116, + "avg_processing_ms": 91, + "median_processing_ms": 116, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4698, + "slowest_section_id": 0, + "slowest_section_time_ms": 117 + } +} diff --git a/data/output/0001d4ce3598e37f20a47fe609736f72e5d73467.json b/data/output/0001d4ce3598e37f20a47fe609736f72e5d73467.json new file mode 100644 index 0000000..c001452 --- /dev/null +++ b/data/output/0001d4ce3598e37f20a47fe609736f72e5d73467.json @@ -0,0 +1,524 @@ +{ + "file_name": "0001d4ce3598e37f20a47fe609736f72e5d73467.txt", + "total_words": 943, + "top_n_words": [ + { + "word": "the", + "count": 65 + }, + { + "word": "to", + "count": 28 + }, + { + "word": "a", + "count": 27 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "girl", + "count": 22 + }, + { + "word": "was", + "count": 21 + }, + { + "word": "her", + "count": 15 + }, + { + "word": "vacca", + "count": 15 + }, + { + "word": "an", + "count": 13 + }, + { + "word": "of", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "As .", + "length": 4 + }, + { + "text": "Sam .", + "length": 5 + }, + { + "text": "Liz .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Then, .", + "length": 7 + }, + { + "text": "target.", + "length": 7 + }, + { + "text": "Social .", + "length": 8 + }, + { + "text": "McCabe .", + "length": 8 + }, + { + "text": "Charles .", + "length": 9 + }, + { + "text": "injuries.", + "length": 9 + }, + { + "text": "Charles .", + "length": 9 + }, + { + "text": "However, .", + "length": 10 + }, + { + "text": "Horrifying.", + "length": 11 + }, + { + "text": "An employee, .", + "length": 14 + }, + { + "text": "automatic weapon.", + "length": 17 + }, + { + "text": "' Last week, the .", + "length": 18 + }, + { + "text": "'You know this was a .", + "length": 22 + }, + { + "text": "the tragedy unfolded .", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "girl fire an automatic weapon.", + "length": 30 + }, + { + "text": "He passed away on Monday night .", + "length": 32 + }, + { + "text": "she fired on full automatic mode.", + "length": 33 + }, + { + "text": "Bullets would have been appalling enough.", + "length": 41 + }, + { + "text": "on Monday when she lost control of the gun.", + "length": 43 + }, + { + "text": "' Instructions: Charles Vacca, a father and .", + "length": 45 + }, + { + "text": "It does not show the moment the girl shot him .", + "length": 47 + }, + { + "text": "First shot then pause: The nine-year-old fires .", + "length": 48 + }, + { + "text": "Clark County Coroner Mike Murphy told NBC News .", + "length": 48 + }, + { + "text": "His last words to the girl are: 'Alright, full auto.", + "length": 52 + }, + { + "text": "big, something that was high on her bucket list to do.", + "length": 54 + }, + { + "text": "Scarmardo owns Arizona's Bullets and Burgers Adventure.", + "length": 55 + }, + { + "text": "the head when the young girl lost control of the weapon.", + "length": 56 + }, + { + "text": "and her parents were treating her,' Scarmardo told MSNBC.", + "length": 57 + }, + { + "text": "Bullets and Burgers is licensed and legal as an operation.", + "length": 58 + }, + { + "text": "owner of the Arizona shooting range where a 9-year-old girl .", + "length": 61 + }, + { + "text": "it was a single round that was the 'immediate cause of death.", + "length": 61 + }, + { + "text": "Then Vacca asks her to fire one shot for him, which she does.", + "length": 61 + }, + { + "text": "to handle the Uzi in a 22-second video clip released by police.", + "length": 63 + }, + { + "text": "Arizona has no law establishing a minimum age to operate a gun.", + "length": 63 + }, + { + "text": "out to defend letting a 9-year-old fire an Uzi, after the girl .", + "length": 64 + }, + { + "text": "Vacca, a father and veteran from Lake Havasu City, Arizona, was .", + "length": 65 + }, + { + "text": "posting comments as to why a nine-year-old was allowed to fire an .", + "length": 67 + }, + { + "text": "media was incredulous about the tragic accident, with most people .", + "length": 67 + }, + { + "text": "'This was a very mature young lady and something she wanted to do .", + "length": 67 + }, + { + "text": "weapon was set to 'full-automatic mode' and caused Vacca his fatal .", + "length": 68 + }, + { + "text": "Vacca tells the girl to adjust her stance and squeeze the trigger to .", + "length": 70 + }, + { + "text": "shocking recording taken by the girl's parents and released by police .", + "length": 71 + }, + { + "text": "one shot as instructed and then prepares to fire again (right) before .", + "length": 71 + }, + { + "text": "that while an autopsy on Vaca was not complete, he could confirm that .", + "length": 71 + }, + { + "text": "with two hands at all times and to take a perpendicular stance to the .", + "length": 71 + }, + { + "text": "Sam Scarmardo, left, the owner of an Arizona shooting range, has come .", + "length": 71 + }, + { + "text": "the horrific accident unfolded in a split second when the Israeli made .", + "length": 72 + }, + { + "text": "Vaca, 39, a father and retired army general, was teaching a young girl .", + "length": 72 + }, + { + "text": "let off a volley, but something goes wrong when she fires a second time.", + "length": 72 + }, + { + "text": "shows Vacca standing closely next to the girl when the gun recoiled as .", + "length": 72 + }, + { + "text": "how to fire an Uzi at Bullets and Burgers range in White Hills, Arizona .", + "length": 73 + }, + { + "text": "Matthews of the John Clayton Show on 710 ESPN in Seattle said, 'A story .", + "length": 73 + }, + { + "text": "veteran from Lake Havasu City, Arizona, teaches the 9-year-old girl how .", + "length": 73 + }, + { + "text": "the clip unfolds, Vacca is seen to instruct the girl to hold the weapon .", + "length": 73 + }, + { + "text": "told the Las Vegas Review Journal that no charges will be filed because .", + "length": 73 + }, + { + "text": "involving a nine-year-old shooting an UZI at a place called Burgers and .", + "length": 73 + }, + { + "text": "39-year-old Army veteran Charles Vacca, died Monday after getting hit in .", + "length": 74 + }, + { + "text": "accidentally shot dead her instructor has come out to defend letting the .", + "length": 74 + }, + { + "text": "Mohave County Sheriff Jim McCabe said the full video of the incident was 'ghastly'.", + "length": 83 + }, + { + "text": "He says that's not unusual for other shooting ranges, and the girl seemed responsible.", + "length": 86 + }, + { + "text": "Bateman simply asked, 'why you'd even let a nine year old handle a gun in the first place?!", + "length": 91 + }, + { + "text": "'The guy just dropped,' McCabe said of Vacca, who suffered more than one gunshot to the head.", + "length": 93 + }, + { + "text": "airlifted to hospital on Monday morning after the little girl shot him but died that evening.", + "length": 93 + }, + { + "text": "' Cameron Atfield directed his anger at the parents stating he 'hoped the parents are charged.", + "length": 94 + }, + { + "text": "accidentally shot dead her instructor Charles Vacca, right, with a single bullet to the head .", + "length": 94 + }, + { + "text": "Scene: The tragedy unfolded at Bullets and Burgers, an activity center 25 miles south of Las Vegas .", + "length": 100 + }, + { + "text": "' The girl loses her two-handed grip on the weapon and it pulls to her left, striking Vacca in the head.", + "length": 104 + }, + { + "text": "' However, Scarmardo is thinking of now instituting a height minimum to perhaps prevent any future tragedy.", + "length": 107 + }, + { + "text": "A 22-second video was released showing the moments before the girl accidentally shot her instructor on Monday.", + "length": 110 + }, + { + "text": "' Weapon: The girl was using an Uzi (file picture) when the recoil send the gun over her head, shooting Vacca .", + "length": 111 + }, + { + "text": "'What we're kind of kicking around right now is like at Disneyland, you know if you're not \"this tall,\" you can't shoot.", + "length": 120 + }, + { + "text": "Watch Video Here (Does not show fatal shot) He said that the girl had successfully fired the weapon several times before when it was set on 'single shot' mode.", + "length": 159 + }, + { + "text": "Killed: Charles Vacca, pictured right, was accidentally shot in the head on Monday as he helped a nine-year-old girl handle a submachine gun at an Arizona shooting range.", + "length": 170 + }, + { + "text": "Vacca was standing next to the girl when she fired, but she and her parents only realized he was mortally wounded when one his colleagues at the shooting range rushed to help him as he lay on the floor.", + "length": 202 + }, + { + "text": "Charles Vacca, 39, was fatally struck in the head at Nevada's Bullets and Burgers when the weapon recoiled upwards out of the girl's grip in the split-second after he had told her to place it into automatic mode and fire.", + "length": 221 + }, + { + "text": "The parents of the nine-year-old girl who accidentally killed her shooting instructor with an Uzi last week didn't see he had been hit because their daughter was complaining the recoil was too much and hurt her shoulder, says the police report released today.", + "length": 259 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6874518990516663 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:31:58.895906884Z", + "first_section_created": "2025-12-23T09:31:58.896261997Z", + "last_section_published": "2025-12-23T09:31:58.896571809Z", + "all_results_received": "2025-12-23T09:31:58.981122519Z", + "output_generated": "2025-12-23T09:31:58.981367028Z", + "total_processing_time_ms": 85, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 84, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:31:58.896261997Z", + "publish_time": "2025-12-23T09:31:58.896489606Z", + "first_worker_start": "2025-12-23T09:31:58.897180932Z", + "last_worker_end": "2025-12-23T09:31:58.980248Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:58.897299737Z", + "start_time": "2025-12-23T09:31:58.89737554Z", + "end_time": "2025-12-23T09:31:58.897611248Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:58.89734Z", + "start_time": "2025-12-23T09:31:58.897485Z", + "end_time": "2025-12-23T09:31:58.980248Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:58.897272236Z", + "start_time": "2025-12-23T09:31:58.897367239Z", + "end_time": "2025-12-23T09:31:58.897570647Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:58.897107429Z", + "start_time": "2025-12-23T09:31:58.897180932Z", + "end_time": "2025-12-23T09:31:58.897261735Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:31:58.896521207Z", + "publish_time": "2025-12-23T09:31:58.896571809Z", + "first_worker_start": "2025-12-23T09:31:58.897223434Z", + "last_worker_end": "2025-12-23T09:31:58.952528Z", + "total_journey_time_ms": 56, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:58.897276836Z", + "start_time": "2025-12-23T09:31:58.897332238Z", + "end_time": "2025-12-23T09:31:58.897364039Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:58.897473Z", + "start_time": "2025-12-23T09:31:58.897599Z", + "end_time": "2025-12-23T09:31:58.952528Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:58.897178432Z", + "start_time": "2025-12-23T09:31:58.897225734Z", + "end_time": "2025-12-23T09:31:58.897241534Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:58.897193533Z", + "start_time": "2025-12-23T09:31:58.897223434Z", + "end_time": "2025-12-23T09:31:58.897232534Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 136, + "min_processing_ms": 54, + "max_processing_ms": 82, + "avg_processing_ms": 68, + "median_processing_ms": 82, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2648, + "slowest_section_id": 0, + "slowest_section_time_ms": 83 + } +} diff --git a/data/output/0001dc22494415d03319a6833a00cd9c559f1395.json b/data/output/0001dc22494415d03319a6833a00cd9c559f1395.json new file mode 100644 index 0000000..7b65914 --- /dev/null +++ b/data/output/0001dc22494415d03319a6833a00cd9c559f1395.json @@ -0,0 +1,336 @@ +{ + "file_name": "0001dc22494415d03319a6833a00cd9c559f1395.txt", + "total_words": 855, + "top_n_words": [ + { + "word": "the", + "count": 56 + }, + { + "word": "of", + "count": 42 + }, + { + "word": "populations", + "count": 20 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "that", + "count": 15 + }, + { + "word": "africa", + "count": 13 + }, + { + "word": "tools", + "count": 13 + }, + { + "word": "early", + "count": 11 + }, + { + "word": "to", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Sarah Griffiths for MailOnline .", + "length": 32 + }, + { + "text": "A collection of prehistoric tools are pictured .", + "length": 48 + }, + { + "text": "An artist's impression of early humans is pictured .", + "length": 52 + }, + { + "text": "In Africa, owing to the hot climate, ancient DNA has not yet been found.", + "length": 72 + }, + { + "text": "‘Stone tools are the only form of preserved material culture for most of human history.", + "length": 89 + }, + { + "text": "Early modern humans had already developed distinct cultural traditions before they left North Africa.", + "length": 101 + }, + { + "text": "It also suggests that early populations took advantage of rivers and lakes that criss-crossed the Saharan desert.", + "length": 113 + }, + { + "text": "It also suggests that early populations took advantage of rivers and lakes that criss-crossed the Saharan desert.", + "length": 113 + }, + { + "text": "‘These stone tools reveal how early populations of modern humans dispersed across the Sahara just before they left North Africa.", + "length": 130 + }, + { + "text": "A crucial next step involves fieldwork in areas such as the Arabian Peninsula to understand how these populations spread into Eurasia.", + "length": 134 + }, + { + "text": "Researchers carried out the largest ever comparative study of stone tools between 130,000 and 75,000 years old to shed new light on our ancestors.", + "length": 147 + }, + { + "text": "The results show, for the first time, how early populations of modern humans dispersed across the Sahara before forming small populations by the rivers.", + "length": 152 + }, + { + "text": "The tools, discovered in the region between sub-Saharan Africa and Eurasia,  were made in different ways, reflecting a diversity of cultural traditions.", + "length": 153 + }, + { + "text": "Researchers carried out the largest ever comparative study of stone tools dating between 130,000 and 75,000 years old to shed new light on our ancestors.", + "length": 153 + }, + { + "text": "A new study also identified at least four distinct populations, each relatively isolated from each other with their own different cultural characteristics.", + "length": 155 + }, + { + "text": "The new study also identified at least four distinct populations, each relatively isolated from each other with their own different cultural characteristics.", + "length": 157 + }, + { + "text": "The study supports other recent theories that modern humans may have first successfully left Africa earlier than 60,000-50,000 years ago, Dr Eleanor Scerri said.", + "length": 161 + }, + { + "text": "’ The researchers used a variety of tests in order to rule that tools from different areas were culturally distinct simply because of differences in raw materials.", + "length": 165 + }, + { + "text": "Dr Scerri said: ‘Not much is known about the structure of early modern human populations in Africa, particularly at the time of their earliest dispersals into Eurasia.", + "length": 169 + }, + { + "text": "Researchers from the University of Oxford, Kings College London and the University of Bordeaux took over 300,000 measurements of stone tools from 17 archaeological sites across North Africa.", + "length": 190 + }, + { + "text": "The study looking at different types of tools, has for the first time shown how early populations of modern humans dispersed across the Sahara before forming small populations by the rivers .", + "length": 191 + }, + { + "text": "A climate model coupled with data about these ancient rivers was matched with the new findings on stone tools to reveal that populations connected by rivers had similarities in their cultures.", + "length": 192 + }, + { + "text": "history supports other theories recently put forward that modern humans may have first successfully left Africa earlier than 60,000-50,000 years ago, which had been the common view among scholars.", + "length": 196 + }, + { + "text": "The tools, discovered in the region between sub-Saharan Africa and Eurasia - including the desert (marked on the map) - were made in different ways, reflecting a diversity of cultural traditions .", + "length": 197 + }, + { + "text": "They combined the data about the tools with the environmental model for the first time, which showed that the Sahara was then a patchwork of savannah, grasslands and water, interspersed with desert.", + "length": 198 + }, + { + "text": "‘Our picture of modern human demography around 100,000 years ago is that there were a number of populations, varying in size and degree of genetic contact, distributed over a wide geographical area.", + "length": 200 + }, + { + "text": "While different populations were relatively isolated, we were interested to find that when connected by rivers, they share similarities in their tool-making suggesting some interaction with one another.", + "length": 202 + }, + { + "text": "‘Our work provides important new evidence that sheds light on both the timing of early modern human dispersals out of Africa and the character of our interaction with other human species, such as Neanderthals.", + "length": 211 + }, + { + "text": "The team also mapped out known ancient rivers and major lakes so they could piece together where populations made and used their tools, according to the study, which is published in the journal Quaternary Science Reviews.", + "length": 221 + }, + { + "text": "’ Ongoing fieldwork seeking to do just that is making ‘remarkable discoveries’ in the deserts of Arabia, which may also have been the region where both Neanderthal and Homo sapiens populations might have interacted, he explained.", + "length": 235 + }, + { + "text": "This could be the earliest evidence of different populations ‘budding’ across the Sahara, using the rivers to disperse and meet people from other populations, according to the paper published in the journal, Quaternary Science Reviews.", + "length": 239 + }, + { + "text": "‘This is the first time that scientists have identified that early modern humans at the cusp of dispersal out of Africa were grouped in separate, isolated and local populations,’ Dr Eleanor Scerri, visiting scholar at the University of Oxford and lead author of the study said.", + "length": 281 + }, + { + "text": "Dr Huw Groucutt, from the School of Archaeology at the University of Oxford, said: ‘The question of whether there was an early successful exit from Africa has become one of whether any of the populations discovered in this paper went in and out of Africa for some or all of this time.", + "length": 286 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5140308439731598 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:31:59.39729462Z", + "first_section_created": "2025-12-23T09:31:59.397638333Z", + "last_section_published": "2025-12-23T09:31:59.397926844Z", + "all_results_received": "2025-12-23T09:31:59.475494789Z", + "output_generated": "2025-12-23T09:31:59.475721198Z", + "total_processing_time_ms": 78, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 77, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:31:59.397638333Z", + "publish_time": "2025-12-23T09:31:59.397869842Z", + "first_worker_start": "2025-12-23T09:31:59.398577269Z", + "last_worker_end": "2025-12-23T09:31:59.474378Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:59.398561168Z", + "start_time": "2025-12-23T09:31:59.398650672Z", + "end_time": "2025-12-23T09:31:59.398799277Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:59.39883Z", + "start_time": "2025-12-23T09:31:59.398957Z", + "end_time": "2025-12-23T09:31:59.474378Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:59.398536967Z", + "start_time": "2025-12-23T09:31:59.398638771Z", + "end_time": "2025-12-23T09:31:59.39887978Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:59.398492966Z", + "start_time": "2025-12-23T09:31:59.398577269Z", + "end_time": "2025-12-23T09:31:59.398648271Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:31:59.397890443Z", + "publish_time": "2025-12-23T09:31:59.397926844Z", + "first_worker_start": "2025-12-23T09:31:59.398587769Z", + "last_worker_end": "2025-12-23T09:31:59.461967Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:59.398561168Z", + "start_time": "2025-12-23T09:31:59.398625471Z", + "end_time": "2025-12-23T09:31:59.398638971Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:59.39883Z", + "start_time": "2025-12-23T09:31:59.398956Z", + "end_time": "2025-12-23T09:31:59.461967Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:59.398644571Z", + "start_time": "2025-12-23T09:31:59.398683273Z", + "end_time": "2025-12-23T09:31:59.398716474Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:59.398553768Z", + "start_time": "2025-12-23T09:31:59.398587769Z", + "end_time": "2025-12-23T09:31:59.398596069Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 138, + "min_processing_ms": 63, + "max_processing_ms": 75, + "avg_processing_ms": 69, + "median_processing_ms": 75, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2702, + "slowest_section_id": 0, + "slowest_section_time_ms": 76 + } +} diff --git a/data/output/0001f1fcec4ca8bc7e278607ba0e31e5cc046e66.json b/data/output/0001f1fcec4ca8bc7e278607ba0e31e5cc046e66.json new file mode 100644 index 0000000..f44dc95 --- /dev/null +++ b/data/output/0001f1fcec4ca8bc7e278607ba0e31e5cc046e66.json @@ -0,0 +1,254 @@ +{ + "file_name": "0001f1fcec4ca8bc7e278607ba0e31e5cc046e66.txt", + "total_words": 579, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "test", + "count": 10 + }, + { + "word": "against", + "count": 9 + }, + { + "word": "australia", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "I thank them for convincing me to play.", + "length": 39 + }, + { + "text": "\"But my friends, supporters and the media...", + "length": 44 + }, + { + "text": "\"I think such a performance was just round the corner.", + "length": 54 + }, + { + "text": "And we are playing against a top side like Australia,\" Younus said after Pakistan's 221-run win.", + "length": 96 + }, + { + "text": "\"Even though I was hit a couple of times on the body I was expecting this and managed to stay firm.", + "length": 99 + }, + { + "text": "Younus Khan (above) fired two centuries against Australia in Dubai but nearly decided not to play .", + "length": 99 + }, + { + "text": "\"I am just happy that my performance helped the team win because we needed this win for a while now.", + "length": 100 + }, + { + "text": "\"Yes I thought about skipping the test series because I was not in proper frame of mind and struggling.", + "length": 103 + }, + { + "text": "Batsman Khan, 37, led Pakistan to a 221-run victory over Australia in the first of two test matches in Dubai .", + "length": 110 + }, + { + "text": "Khan (right) said he was not in the right frame of mind after feeling aggrieved at not being selected for ODI side .", + "length": 116 + }, + { + "text": "\" Pakistan captain Misbah-ul-Haq (right) made an encouraging 69 in the first innings as he looks to return to form .", + "length": 116 + }, + { + "text": "\" Sunday's win was Pakistan's first over Australia in tests since 2010 and they have not won a test series against Australia since 1994.", + "length": 136 + }, + { + "text": "Pakistan captain Misbah-ul-Haq was also under pressure because of his poor form but came good in the test with a 69 in the first innings.", + "length": 137 + }, + { + "text": "\"Grant Flower spent a lot of time with me in the nets and mentally he prepared me for a bruising time against a top class Australian attack.", + "length": 140 + }, + { + "text": "Host Ramiz Raja, a former Pakistan captain, urged him to give credit to the board for selecting him but Younus was in no mood to be generous.", + "length": 141 + }, + { + "text": "The 40-year-old had opted to sit out of the third ODI against Australia because of his lack of runs and there were calls to sack him before next year's World Cup.", + "length": 162 + }, + { + "text": "Younus, who became Pakistan's leading test century maker with 26 hundreds, last month slammed the selectors and board for not showing senior players enough respect.", + "length": 164 + }, + { + "text": "\"I give lot of credit to Grant Ludon and Grant Flower who worked hard with me before this test to get me ready for the match psychologically and mentally,\" Younus said.", + "length": 168 + }, + { + "text": "The former captain, who will turn 37 next month, had criticised the selectors and the Pakistan Cricket Board (PCB) after being dropped for the ODI series against Australia.", + "length": 172 + }, + { + "text": "On Sunday, Younus avoided mentioning the names of team manager Moin Khan, also the chief selector, and head coach Waqar Younis or the PCB at the post-match presentation ceremony.", + "length": 178 + }, + { + "text": "He had also offered to step down from the test side but the PCB avoided taking any disciplinary action against him despite confirming that Younus had violated code of conduct for players.", + "length": 187 + }, + { + "text": "And on Sunday he refused to give credit to the board or the selectors and only thanked Zimbabwean Grant Flower, the team's batting coach, and trainer Grant Ludan for his stellar performance.", + "length": 190 + }, + { + "text": "Now we have a very good chance of becoming the first Pakistani side to win a test series against Australia in a long time and we will not let up in the second test in Abu Dhabi,\" said Misbah.", + "length": 191 + }, + { + "text": "Senior Pakistan batsman Younus Khan, who capped a memorable test against Australia in Dubai with hundreds in both innings, said he had thought of skipping the two-match series after being dropped from the 50-over squad.", + "length": 219 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4686964452266693 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:31:59.898662656Z", + "first_section_created": "2025-12-23T09:31:59.898983468Z", + "last_section_published": "2025-12-23T09:31:59.899133674Z", + "all_results_received": "2025-12-23T09:31:59.960108089Z", + "output_generated": "2025-12-23T09:31:59.960279095Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:31:59.898983468Z", + "publish_time": "2025-12-23T09:31:59.899133674Z", + "first_worker_start": "2025-12-23T09:31:59.899674994Z", + "last_worker_end": "2025-12-23T09:31:59.959209Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:31:59.899677494Z", + "start_time": "2025-12-23T09:31:59.899756797Z", + "end_time": "2025-12-23T09:31:59.899816599Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:31:59.899954Z", + "start_time": "2025-12-23T09:31:59.900101Z", + "end_time": "2025-12-23T09:31:59.959209Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:31:59.899649993Z", + "start_time": "2025-12-23T09:31:59.899737196Z", + "end_time": "2025-12-23T09:31:59.899913803Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:31:59.89957129Z", + "start_time": "2025-12-23T09:31:59.899674994Z", + "end_time": "2025-12-23T09:31:59.899750297Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3182, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0002067d13d3ca304e0bc98d04dde85d4091c55e.json b/data/output/0002067d13d3ca304e0bc98d04dde85d4091c55e.json new file mode 100644 index 0000000..d47484d --- /dev/null +++ b/data/output/0002067d13d3ca304e0bc98d04dde85d4091c55e.json @@ -0,0 +1,282 @@ +{ + "file_name": "0002067d13d3ca304e0bc98d04dde85d4091c55e.txt", + "total_words": 534, + "top_n_words": [ + { + "word": "the", + "count": 19 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "he", + "count": 16 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "his", + "count": 8 + }, + { + "word": "on", + "count": 8 + }, + { + "word": "was", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "'Today is that day.", + "length": 19 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "12:15 EST, 22 February 2014 .", + "length": 29 + }, + { + "text": "12:20 EST, 22 February 2014 .", + "length": 29 + }, + { + "text": "'I am not the person I was in 1996.", + "length": 35 + }, + { + "text": "He has links to Rochdale and Tameside.", + "length": 38 + }, + { + "text": "But it appears that he couldn't wait for his time to come.", + "length": 58 + }, + { + "text": "A convicted killer is on the run after he bolted while on day release from jail.", + "length": 80 + }, + { + "text": "That meant he would have been able to apply for parole some time this year or next.", + "length": 83 + }, + { + "text": "'She has done much to me help and realise my full potential as a human being,' he said.", + "length": 87 + }, + { + "text": "Paul Maxwell, 49, disappeared while visiting Rochdale town centre on Friday with prison staff.", + "length": 94 + }, + { + "text": "He wears glasses and was last seen wearing blue jeans, a grey fleece top and white training shoes.", + "length": 98 + }, + { + "text": "Maxwell is described as white, about 5ft 7in tall, of medium build with short brown hair and blue eyes.", + "length": 103 + }, + { + "text": "Victim: Joe Smales was beaten and later died after being attacked and robbed at his home in Wakefield 1996 .", + "length": 108 + }, + { + "text": "'At some point I had to stand up, take responsibility and be man enough to take responsibility for my actions.", + "length": 110 + }, + { + "text": "' The attack on the reclusive elderly brothers sent shockwaves through the tight-knit community of Stanley in Wakefield.", + "length": 120 + }, + { + "text": "'I would ask anyone with information about where he currently is, has been or could be heading to contact us in confidence.", + "length": 123 + }, + { + "text": "He was sentenced to life imprisonment for the 1996 murder and robbery of 85-year-old Joe Smales in Wakefield, West Yorkshire.", + "length": 125 + }, + { + "text": "Tragically, the brothers failed to report the incident and were targeted again four months later, when Joe was fatally attacked.", + "length": 128 + }, + { + "text": "Following the retrial Mr Justice Butterfield gave Maxwell a life sentence and ordered that he must serve a minimum of 17 and a half years.", + "length": 138 + }, + { + "text": "Killer: A photo issued by Greater Manchester Police of Paul Maxwell, 49, who absconded while on on day release with prison staff in Rochdale town centre .", + "length": 154 + }, + { + "text": "In a handwritten note he read out on the first day of the hearing at Leeds Crown Court, he confessed to the murder and apologised to the family of Mr Smales for his 'cowardice'.", + "length": 177 + }, + { + "text": "Mr Smales and his 68-year-old brother Bert, who was also badly injured in the attack, had lived all their lives at their cottage, where they topped up their pensions by selling eggs.", + "length": 182 + }, + { + "text": "' First jailed for the killing of Mr Smales alongside his brother in 1998, Maxwell made headlines in 2011 when he unexpectedly pleaded guilty at a retrial after his conviction was overturned on appeal.", + "length": 201 + }, + { + "text": "Inspector Niall Pawson of Greater Manchester Police said: 'While we do not believe Paul Maxwell presents a significant threat to the public, he is unlawfully at large and he needs to be returned to prison.", + "length": 205 + }, + { + "text": "In another shock revelation, in his statement, written in his cell, he claimed to fallen in love with the daughter of a priest with whom he had developed a relationship during counselling sessions in prison.", + "length": 207 + }, + { + "text": "At Maxwell's original trial in 1998, a jury heard the Smales brothers' quiet existence was shattered in June 1996 when their attackers barged through the door, threatening Bert with scissors and getting away with £7,000.", + "length": 223 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7043441534042358 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:00.399876285Z", + "first_section_created": "2025-12-23T09:32:00.400226099Z", + "last_section_published": "2025-12-23T09:32:00.400444307Z", + "all_results_received": "2025-12-23T09:32:00.469489828Z", + "output_generated": "2025-12-23T09:32:00.469649734Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:00.400226099Z", + "publish_time": "2025-12-23T09:32:00.400444307Z", + "first_worker_start": "2025-12-23T09:32:00.400971127Z", + "last_worker_end": "2025-12-23T09:32:00.468664Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:00.400984827Z", + "start_time": "2025-12-23T09:32:00.40106273Z", + "end_time": "2025-12-23T09:32:00.401132933Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:00.401083Z", + "start_time": "2025-12-23T09:32:00.401242Z", + "end_time": "2025-12-23T09:32:00.468664Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:00.400935926Z", + "start_time": "2025-12-23T09:32:00.401009328Z", + "end_time": "2025-12-23T09:32:00.401165034Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:00.400895124Z", + "start_time": "2025-12-23T09:32:00.400971127Z", + "end_time": "2025-12-23T09:32:00.401009828Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3032, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/0002095e55fcbd3a2f366d9bf92a95433dc305ef.json b/data/output/0002095e55fcbd3a2f366d9bf92a95433dc305ef.json new file mode 100644 index 0000000..aa363f5 --- /dev/null +++ b/data/output/0002095e55fcbd3a2f366d9bf92a95433dc305ef.json @@ -0,0 +1,254 @@ +{ + "file_name": "0002095e55fcbd3a2f366d9bf92a95433dc305ef.txt", + "total_words": 549, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "bolt", + "count": 8 + }, + { + "word": "by", + "count": 6 + }, + { + "word": "gold", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "The U.", + "length": 6 + }, + { + "text": "73 seconds.", + "length": 11 + }, + { + "text": "36 seconds.", + "length": 11 + }, + { + "text": "S finished second in 37.", + "length": 24 + }, + { + "text": "Their quartet recorded a championship record of 41.", + "length": 51 + }, + { + "text": "29 seconds, well clear of France, who crossed the line in second place in 42.", + "length": 77 + }, + { + "text": "Kiprop's compatriot Eunice Jepkoech Sum was a surprise winner of the women's 800m.", + "length": 82 + }, + { + "text": "56 seconds with Canada taking the bronze after Britain were disqualified for a faulty handover.", + "length": 95 + }, + { + "text": "The relay triumph followed individual successes in the 100 and 200 meters in the Russian capital.", + "length": 97 + }, + { + "text": "The British quartet, who were initially fourth, were promoted to the bronze which eluded their men's team.", + "length": 106 + }, + { + "text": "Fraser-Pryce, like Bolt aged 26, became the first woman to achieve three golds in the 100-200 and the relay.", + "length": 108 + }, + { + "text": "Gatlin strayed out of his lane as he struggled to get full control of their baton and was never able to get on terms with Bolt.", + "length": 127 + }, + { + "text": "(CNN) -- Usain Bolt rounded off the world championships Sunday by claiming his third gold in Moscow as he anchored Jamaica to victory in the men's 4x100m relay.", + "length": 160 + }, + { + "text": "Victory was never seriously in doubt once he got the baton safely in hand from Ashmeade, while Gatlin and the United States third leg runner Rakieem Salaam had problems.", + "length": 169 + }, + { + "text": "The fastest man in the world charged clear of United States rival Justin Gatlin as the Jamaican quartet of Nesta Carter, Kemar Bailey-Cole, Nickel Ashmeade and Bolt won in 37.", + "length": 175 + }, + { + "text": "\"I'm proud of myself and I'll continue to work to dominate for as long as possible,\" Bolt said, having previously expressed his intention to carry on until the 2016 Rio Olympics.", + "length": 178 + }, + { + "text": "Earlier, Jamaica's women underlined their dominance in the sprint events by winning the 4x100m relay gold, anchored by Shelly-Ann Fraser-Pryce, who like Bolt was completing a triple.", + "length": 182 + }, + { + "text": "She later attempted to clarify her comments, but there were renewed calls by gay rights groups for a boycott of the 2014 Winter Games in Sochi, the next major sports event in Russia.", + "length": 182 + }, + { + "text": "Germany's Christina Obergfoll finally took gold at global level in the women's javelin after five previous silvers, while Kenya's Asbel Kiprop easily won a tactical men's 1500m final.", + "length": 183 + }, + { + "text": "In other final action on the last day of the championships, France's Teddy Tamgho became the third man to leap over 18m in the triple jump, exceeding the mark by four centimeters to take gold.", + "length": 192 + }, + { + "text": "The 26-year-old Bolt has now collected eight gold medals at world championships, equaling the record held by American trio Carl Lewis, Michael Johnson and Allyson Felix, not to mention the small matter of six Olympic titles.", + "length": 224 + }, + { + "text": "Bolt's final dash for golden glory brought the eight-day championship to a rousing finale, but while the hosts topped the medal table from the United States there was criticism of the poor attendances in the Luzhniki Stadium.", + "length": 225 + }, + { + "text": "There was further concern when their pole vault gold medalist Yelena Isinbayeva made controversial remarks in support of Russia's new laws, which make \"the propagandizing of non-traditional sexual relations among minors\" a criminal offense.", + "length": 240 + }, + { + "text": "Defending champions, the United States, were initially back in the bronze medal position after losing time on the second handover between Alexandria Anderson and English Gardner, but promoted to silver when France were subsequently disqualified for an illegal handover.", + "length": 269 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5125720500946045 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:00.900642198Z", + "first_section_created": "2025-12-23T09:32:00.900988311Z", + "last_section_published": "2025-12-23T09:32:00.901189719Z", + "all_results_received": "2025-12-23T09:32:00.97023964Z", + "output_generated": "2025-12-23T09:32:00.970426848Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:00.900988311Z", + "publish_time": "2025-12-23T09:32:00.901189719Z", + "first_worker_start": "2025-12-23T09:32:00.901783441Z", + "last_worker_end": "2025-12-23T09:32:00.969358Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:00.901775441Z", + "start_time": "2025-12-23T09:32:00.901849644Z", + "end_time": "2025-12-23T09:32:00.901924247Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:00.901965Z", + "start_time": "2025-12-23T09:32:00.90211Z", + "end_time": "2025-12-23T09:32:00.969358Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:00.901716539Z", + "start_time": "2025-12-23T09:32:00.901786341Z", + "end_time": "2025-12-23T09:32:00.901914746Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:00.901722439Z", + "start_time": "2025-12-23T09:32:00.901783441Z", + "end_time": "2025-12-23T09:32:00.901856044Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3192, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/00022dbfa44ccdb94c1dc06938047e258076cf75.json b/data/output/00022dbfa44ccdb94c1dc06938047e258076cf75.json new file mode 100644 index 0000000..23d755c --- /dev/null +++ b/data/output/00022dbfa44ccdb94c1dc06938047e258076cf75.json @@ -0,0 +1,386 @@ +{ + "file_name": "00022dbfa44ccdb94c1dc06938047e258076cf75.txt", + "total_words": 661, + "top_n_words": [ + { + "word": "a", + "count": 27 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "the", + "count": 19 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "he", + "count": 13 + }, + { + "word": "his", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "was", + "count": 10 + }, + { + "word": "bombs", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "A .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "addicted.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "terrorist.", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Afghanistan.", + "length": 12 + }, + { + "text": "He is not a .", + "length": 13 + }, + { + "text": "Chris Brooke .", + "length": 14 + }, + { + "text": "bullets and gunpowder.", + "length": 22 + }, + { + "text": "The court heard he had .", + "length": 24 + }, + { + "text": "Hope it goes with a bang.", + "length": 25 + }, + { + "text": "storage jars on the shelves.", + "length": 28 + }, + { + "text": "11:19 EST, 3 September 2012 .", + "length": 29 + }, + { + "text": "21:22 EST, 3 September 2012 .", + "length": 29 + }, + { + "text": "Everyone knows I make small bombs.", + "length": 34 + }, + { + "text": "Prosecutor Richard Woolfall said .", + "length": 34 + }, + { + "text": "He was given a 15-month supervision .", + "length": 37 + }, + { + "text": "' Smith, of Hull, pleaded guilty to .", + "length": 37 + }, + { + "text": "He added: 'He has been in custody now .", + "length": 39 + }, + { + "text": "In his workshop police found two live .", + "length": 39 + }, + { + "text": "It carried the message: ‘Paul 40 today.", + "length": 41 + }, + { + "text": "He added: 'There was a mixture of sugar and .", + "length": 45 + }, + { + "text": "Some people collect stamps, I make small bombs.", + "length": 47 + }, + { + "text": "another charge of possession of a class B drug.", + "length": 47 + }, + { + "text": "He does have an unhealthy interest in explosive devices.", + "length": 56 + }, + { + "text": "for five months and does not find it at all pleasurable.", + "length": 56 + }, + { + "text": "He told police: 'I don’t know what all the fuss is about.", + "length": 59 + }, + { + "text": "He does not have any radical religious or political opinions.", + "length": 61 + }, + { + "text": "' An Army bomb disposal team was called in to make the home safe.", + "length": 65 + }, + { + "text": "It included material of the type used by the Taliban in Afghanistan.", + "length": 68 + }, + { + "text": "chemicals including sulphur, iron oxide and magnesium were held in .", + "length": 68 + }, + { + "text": "making or possessing explosives under suspicious circumstances and .", + "length": 68 + }, + { + "text": "aluminium of the type used in Northern Ireland and by the Taliban in .", + "length": 70 + }, + { + "text": "pipe bombs and 20 to 30 detonators as well as shotgun cartridges, 9mm .", + "length": 71 + }, + { + "text": "order, with a drug rehabilitation programme and 150 hours' unpaid work.", + "length": 71 + }, + { + "text": "lost his job and had taken to injecting amphetamines, to which he was .", + "length": 71 + }, + { + "text": "David Gordon, defending, said Smith was 'a somewhat naive 40-year-old'.", + "length": 71 + }, + { + "text": "possession of flares or fireworks which are not commercially available.", + "length": 71 + }, + { + "text": "One explosion in Smith’s back garden set off all the local car alarms.", + "length": 72 + }, + { + "text": "ten-year anti-social behaviour order was also imposed, banning him from .", + "length": 73 + }, + { + "text": "Smith told officers he was not a terrorist or fanatic, but made the devices as a hobby for fun.", + "length": 95 + }, + { + "text": "’ However, neighbours got so fed up with him setting off explosions that they complained to police.", + "length": 101 + }, + { + "text": "He also collected gunpowder from unexploded bombs found in eroding cliffs on the East Yorkshire coast.", + "length": 102 + }, + { + "text": "Officers raided the home and found a huge collection of bomb-making equipment, Hull Crown Court was told.", + "length": 105 + }, + { + "text": "Some parts were made from household objects, such as a lunchbox which was turned into a remote-controlled detonator.", + "length": 116 + }, + { + "text": "An electrician gave a whole new meaning to DIY by building and setting off homemade bombs as a hobby, a court heard yesterday.", + "length": 126 + }, + { + "text": "' He learned his skills from the internet and spent the past ten years making increasingly sophisticated bombs, the court heard.", + "length": 128 + }, + { + "text": "Handed 15-month supervision order and 10-year ASBO banning him from possession of flares or fireworks not commercially available .", + "length": 130 + }, + { + "text": "'Some people collect stamps I make small bombs': Hull Crown Court heard Smith had lost his job as an electrician and had taken to injecting amphetamine .", + "length": 153 + }, + { + "text": "Recognition: Unemployed drug user Smith set off so many of his homemade devices that this year his family bought him a bomb-shaped cake for his 40th birthday .", + "length": 159 + }, + { + "text": "Dangerous: Chemicals including sulphur, iron oxide and magnesium were held in storage jars on the shelves and a pestle and mortar had been used to mix gunpowder .", + "length": 162 + }, + { + "text": "Paul Smith was so obsessed with constructing explosive devices from a workshop at his parents’ house that his family gave him a birthday cake in the shape of a bomb.", + "length": 167 + }, + { + "text": "Serious: Police uncovered two live pipe bombs, 20-30 detonators, two battery packs and a sophisticated halogen light operating trigger at Smith's bomb-making workshop .", + "length": 168 + }, + { + "text": "Collection: Police also found a series of home-made detonators and a stock-pile of scaffolding tubing, 10 shotgun cartridges, a hoard of 9mm bullets and a store of gun-powder in the raid .", + "length": 188 + }, + { + "text": "Hobby: Paul Smith's 10-year habit of making DIY bombs in a workshop at his parent's house in East Hull came to an end in April when neighbours got sick of his explosions setting off their car alarms .", + "length": 200 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5566460490226746 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:01.402323145Z", + "first_section_created": "2025-12-23T09:32:01.402675559Z", + "last_section_published": "2025-12-23T09:32:01.402876366Z", + "all_results_received": "2025-12-23T09:32:01.471269563Z", + "output_generated": "2025-12-23T09:32:01.47144397Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:01.402675559Z", + "publish_time": "2025-12-23T09:32:01.402876366Z", + "first_worker_start": "2025-12-23T09:32:01.403311083Z", + "last_worker_end": "2025-12-23T09:32:01.47035Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:01.403297982Z", + "start_time": "2025-12-23T09:32:01.403368185Z", + "end_time": "2025-12-23T09:32:01.403457588Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:01.403584Z", + "start_time": "2025-12-23T09:32:01.403734Z", + "end_time": "2025-12-23T09:32:01.47035Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:01.403323583Z", + "start_time": "2025-12-23T09:32:01.403424787Z", + "end_time": "2025-12-23T09:32:01.403618995Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:01.40324418Z", + "start_time": "2025-12-23T09:32:01.403311083Z", + "end_time": "2025-12-23T09:32:01.403406887Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3870, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/00027e965c8264c35cc1bc55556db388da82b07f.json b/data/output/00027e965c8264c35cc1bc55556db388da82b07f.json new file mode 100644 index 0000000..cb18234 --- /dev/null +++ b/data/output/00027e965c8264c35cc1bc55556db388da82b07f.json @@ -0,0 +1,392 @@ +{ + "file_name": "00027e965c8264c35cc1bc55556db388da82b07f.txt", + "total_words": 1047, + "top_n_words": [ + { + "word": "the", + "count": 73 + }, + { + "word": "to", + "count": 42 + }, + { + "word": "gsa", + "count": 26 + }, + { + "word": "a", + "count": 25 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "that", + "count": 17 + }, + { + "word": "s", + "count": 16 + }, + { + "word": "for", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "C.", + "length": 2 + }, + { + "text": "CNN's Sara Anwar, Elizabeth M.", + "length": 30 + }, + { + "text": "Four of those trips were to St.", + "length": 31 + }, + { + "text": ", where a lot of business is done.", + "length": 34 + }, + { + "text": "Watch Erin Burnett weekdays 7pm ET.", + "length": 35 + }, + { + "text": "Darrell Issa, R-California, to the GSA.", + "length": 39 + }, + { + "text": "The total cost to taxpayers was $24,221.", + "length": 40 + }, + { + "text": "\"It doesn't make sense,\" the employee said.", + "length": 43 + }, + { + "text": "For the latest from Erin Burnett click here.", + "length": 44 + }, + { + "text": "Nunez and Tom Cohen contributed to this report.", + "length": 47 + }, + { + "text": "official, took over as acting GSA administrator.", + "length": 48 + }, + { + "text": "\" But a GSA spokeswoman said, \"We are not going to defend this type of travel.", + "length": 78 + }, + { + "text": "Two of Johnson's deputies were fired, and eight other employees left the agency.", + "length": 80 + }, + { + "text": "Jason Klumb, the GSA's regional administrator for Kansas City, defended the hire.", + "length": 81 + }, + { + "text": "The General Services Administration, which has more than 12,600 employees and a $26.", + "length": 84 + }, + { + "text": "Louis; four were to Washington, with a side trip to Cincinnati; and one was to San Diego.", + "length": 89 + }, + { + "text": "The details requested by Issa about the GSA program have not been provided to the committee.", + "length": 92 + }, + { + "text": "The controversy became politically toxic after reports and video clips of the lavish conference were released.", + "length": 110 + }, + { + "text": "The House Committee on Oversight and Government Reform requested details about the GSA's teleworking program in June.", + "length": 117 + }, + { + "text": "\" Klumb called the GSA's teleworking program \"a successful program that's going to lead to cost savings for taxpayers.", + "length": 118 + }, + { + "text": "Jeff Neely, the GSA official who organized the conference, resigned, as did the agency's administrator, Martha Johnson.", + "length": 119 + }, + { + "text": "GSA spokeswoman Betsaida Alcantara said in a statement this year that all the agency's practices are under a top-down review.", + "length": 125 + }, + { + "text": "Under the program, employees work from home and may live in another state from the region in which they're actually assigned.", + "length": 125 + }, + { + "text": "3 billion budget, is a relatively obscure federal agency that handles government real estate and other non-military procurement.", + "length": 128 + }, + { + "text": "The revelation prompted taxpayer indignation, embarrassed the administration and put a spotlight on wasteful spending by the GSA.", + "length": 129 + }, + { + "text": "\"When you consider everything you need when you hire someone, it would have been better to look for someone in the Kansas City area.", + "length": 132 + }, + { + "text": "It cost more than $24,000 for the business development specialist to travel to and from the mainland United States over the past year.", + "length": 134 + }, + { + "text": "In the past year, according to GSA travel records, the employee has flown back to the mainland nine times for conferences and meetings.", + "length": 135 + }, + { + "text": "\"And when factoring all of those in, it was determined that he was the best candidate, even in light of the cost that would be incurred.", + "length": 136 + }, + { + "text": "\"The cost of that travel was included in the consideration of his candidacy as an employee as compared with the other applicants,\" Klumb said.", + "length": 142 + }, + { + "text": "\" Dan Tangherlini, who was appointed acting GSA administrator this year, said the agency was examining the cost of the entire teleworking program.", + "length": 146 + }, + { + "text": "\" And a GSA employee in Kansas City, who requested anonymity, said that hiring someone in Hawaii to work for the Kansas City region was ludicrous.", + "length": 146 + }, + { + "text": "The Kansas City employee, who started his job in January 2011, is paid $84,440 and works from his home in Honolulu, a GSA representative confirmed.", + "length": 147 + }, + { + "text": "It would have reduced the cost of travel by at least 70 percent when you look at just the airfare of what it takes to from Honolulu to Washington, D.", + "length": 149 + }, + { + "text": "In July, a CNN investigation revealed that the GSA's Kansas City office spent more than $20,000 to send employees to cooking classes to build team spirit.", + "length": 154 + }, + { + "text": "You've got to explain to me why that's a cost-effective move for the American people, and that's a new standard that we're asking everyone at GSA to adhere to.", + "length": 159 + }, + { + "text": "That followed disclosures that 95 virtual employees, including 12 in supervisory positions, spent nearly $750,000 in travel costs between October 2010 and June 2011.", + "length": 165 + }, + { + "text": "He is among several hundred GSA \"virtual\" workers who also travel to various conferences and their home offices, costing the agency millions of dollars over the past three years.", + "length": 178 + }, + { + "text": "\"If we have someone who is working in Nebraska but reporting to Boston, there has to be a clear explanation for what value they're providing, and you've got to give me the business case.", + "length": 186 + }, + { + "text": "And 84 GSA employees, most of them supervisors or other senior staff -- all subjects of inspector general investigations -- are still collecting their bonuses, totaling more than $1 million in taxpayer money.", + "length": 208 + }, + { + "text": "CNN also requested the information more than two months ago through the federal Freedom of Information Act but has been repeatedly told by the GSA that FOIA staff members have not finished compiling the material.", + "length": 212 + }, + { + "text": "\"I think the most important part for the GSA to think about is make sure we open ourselves up, avail ourselves to all the smart people in the country, but then also make sure we have a clear business case,\" he said.", + "length": 215 + }, + { + "text": "Congress launched an investigation into the GSA after a scathing inspector general's report issued this year showed lavish spending -- $823,000 -- at the agency's Western Regions Conference in Las Vegas in October 2010.", + "length": 219 + }, + { + "text": "While the classes do not amount to a significant sum of money in the world of trillion-dollar government budgets, insiders said it was part of the free-spending culture that went on for years at the GSA's Kansas City regional headquarters.", + "length": 239 + }, + { + "text": "\"The American people have a right to know that federal bureaucrats who enjoy the benefits of virtual work are eligible and responsible stewards of the taxpayer dollars that support the program,\" according to a letter from committee Chairman Rep.", + "length": 245 + }, + { + "text": "In addition to the Las Vegas conference, the GSA apparently spent $330,000 to relocate an employee from Denver to Hawaii and probably millions more on other employees over a two-year period, according to a transcript of an interview with a GSA event planner.", + "length": 258 + }, + { + "text": "Kansas City, Missouri (CNN) -- The General Services Administration, already under investigation for lavish spending, allowed an employee to telecommute from Hawaii even though he is based at the GSA's Kansas City, Missouri, office, a CNN investigation has found.", + "length": 262 + }, + { + "text": "\" The GSA \"virtual employee\" program is different from telework programs offered by many private companies including CNN's parent company, Turner Broadcasting, in which some employees are encouraged to work from home some days of the week, partially to reduce traffic congestion.", + "length": 279 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5763686299324036 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:01.903751183Z", + "first_section_created": "2025-12-23T09:32:01.904143898Z", + "last_section_published": "2025-12-23T09:32:01.904753121Z", + "all_results_received": "2025-12-23T09:32:01.995686374Z", + "output_generated": "2025-12-23T09:32:01.995906682Z", + "total_processing_time_ms": 92, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 90, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:01.904143898Z", + "publish_time": "2025-12-23T09:32:01.904437309Z", + "first_worker_start": "2025-12-23T09:32:01.904876126Z", + "last_worker_end": "2025-12-23T09:32:01.994737Z", + "total_journey_time_ms": 90, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:01.904899827Z", + "start_time": "2025-12-23T09:32:01.90497763Z", + "end_time": "2025-12-23T09:32:01.905072033Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:01.905082Z", + "start_time": "2025-12-23T09:32:01.905243Z", + "end_time": "2025-12-23T09:32:01.994737Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 89 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:01.904809723Z", + "start_time": "2025-12-23T09:32:01.904902227Z", + "end_time": "2025-12-23T09:32:01.905100034Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:01.904798323Z", + "start_time": "2025-12-23T09:32:01.904876126Z", + "end_time": "2025-12-23T09:32:01.904940828Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:01.904498612Z", + "publish_time": "2025-12-23T09:32:01.904753121Z", + "first_worker_start": "2025-12-23T09:32:01.905023632Z", + "last_worker_end": "2025-12-23T09:32:01.980797Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:01.904997031Z", + "start_time": "2025-12-23T09:32:01.905023632Z", + "end_time": "2025-12-23T09:32:01.905055133Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:01.905344Z", + "start_time": "2025-12-23T09:32:01.905468Z", + "end_time": "2025-12-23T09:32:01.980797Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:01.905052133Z", + "start_time": "2025-12-23T09:32:01.905104035Z", + "end_time": "2025-12-23T09:32:01.905163337Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:01.904999531Z", + "start_time": "2025-12-23T09:32:01.905030332Z", + "end_time": "2025-12-23T09:32:01.905048733Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 164, + "min_processing_ms": 75, + "max_processing_ms": 89, + "avg_processing_ms": 82, + "median_processing_ms": 89, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3097, + "slowest_section_id": 0, + "slowest_section_time_ms": 90 + } +} diff --git a/data/output/0002b1d590aa4d2f6f000a03947317b6dc4e5c18.json b/data/output/0002b1d590aa4d2f6f000a03947317b6dc4e5c18.json new file mode 100644 index 0000000..8a720a8 --- /dev/null +++ b/data/output/0002b1d590aa4d2f6f000a03947317b6dc4e5c18.json @@ -0,0 +1,422 @@ +{ + "file_name": "0002b1d590aa4d2f6f000a03947317b6dc4e5c18.txt", + "total_words": 632, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "litvinenko", + "count": 13 + }, + { + "word": "death", + "count": 8 + }, + { + "word": "his", + "count": 8 + }, + { + "word": "mr", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "death.", + "length": 6 + }, + { + "text": "death.", + "length": 6 + }, + { + "text": "claims.", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Any truth .", + "length": 11 + }, + { + "text": "Both deny .", + "length": 11 + }, + { + "text": "involvement.", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "weeks later.", + "length": 12 + }, + { + "text": "the Government.", + "length": 15 + }, + { + "text": "never happened before.", + "length": 22 + }, + { + "text": "He died in hospital three .", + "length": 27 + }, + { + "text": "Ryan Kisiel and Mark Duell .", + "length": 28 + }, + { + "text": "on the streets of London’.", + "length": 28 + }, + { + "text": "Scotland Yard and the Crown .", + "length": 29 + }, + { + "text": "13:20 EST, 20 September 2012 .", + "length": 30 + }, + { + "text": "18:09 EST, 20 September 2012 .", + "length": 30 + }, + { + "text": "While lying stricken in a London .", + "length": 34 + }, + { + "text": "Ben Emmerson QC, representing Mr .", + "length": 34 + }, + { + "text": "A pre-inquest hearing yesterday was .", + "length": 37 + }, + { + "text": "The Daily Mail has led criticism of .", + "length": 37 + }, + { + "text": "Mr Litvinenko, who had been granted .", + "length": 37 + }, + { + "text": "Sir Robert, a High Court judge, said .", + "length": 38 + }, + { + "text": "Hugh Davies, counsel for the inquest, .", + "length": 39 + }, + { + "text": "But they will not be disclosed to the .", + "length": 39 + }, + { + "text": "‘Pending the outcome of the disclosure .", + "length": 42 + }, + { + "text": "Mr Davies said: ‘Claims have been made .", + "length": 42 + }, + { + "text": "Outside court, Mrs Litvinenko said: ‘I .", + "length": 42 + }, + { + "text": "Hotel in Grosvenor Square on November 1, 2006.", + "length": 46 + }, + { + "text": "This evidence will be redacted, or blacked out.", + "length": 47 + }, + { + "text": "suspects, but Russia has refused to extradite them.", + "length": 51 + }, + { + "text": "The full inquest is due finally to begin early next year.", + "length": 57 + }, + { + "text": "The Metropolitan Police Service made an inquiry into these .", + "length": 60 + }, + { + "text": "assassination’ carried out by agents of the Russian state.", + "length": 60 + }, + { + "text": "believe we will get justice in Britain – we need the truth.", + "length": 61 + }, + { + "text": "can be conducted behind closed doors in any civil or inquest hearing.", + "length": 69 + }, + { + "text": "he endorsed the comments of the original coroner who opened the case .", + "length": 70 + }, + { + "text": "is very important for all of us, my friends, my family and the public.", + "length": 70 + }, + { + "text": "redacted from the report at the request of Her Majesty’s Government.", + "length": 70 + }, + { + "text": "political asylum in the UK, was allegedly poisoned at the Millennium .", + "length": 70 + }, + { + "text": "known to you sir and counsel and solicitors to the inquest, has been .", + "length": 70 + }, + { + "text": "told yesterday’s hearing in central London that the contents of the .", + "length": 71 + }, + { + "text": "that Mr Litvinenko had contact with British intelligence prior to his .", + "length": 71 + }, + { + "text": "examine allegations the Russian authorities ordered Mr Litvinenko’s .", + "length": 71 + }, + { + "text": "Prosecution Service have both named Lugovoy and Kovtun as their prime .", + "length": 71 + }, + { + "text": "opposition to government officials, including President Vladimir Putin.", + "length": 71 + }, + { + "text": "other interested parties represented at the inquest, at the request of .", + "length": 72 + }, + { + "text": "police report are known to his team and to the coroner, Sir Robert Owen.", + "length": 72 + }, + { + "text": "exercise currently under way, the product of these inquiries, which are .", + "length": 73 + }, + { + "text": "that it was in the ‘public, national and international’ interests to .", + "length": 74 + }, + { + "text": "It was a British citizen killed here, a British soul, a killing that had .", + "length": 74 + }, + { + "text": "hospital bed, Mr Litvinenko said Moscow ordered his death because of his .", + "length": 74 + }, + { + "text": "told the police evidence might reveal whether his death was a ‘targeted .", + "length": 75 + }, + { + "text": "true it would mean Russia was behind ‘state-sponsored nuclear terrorism .", + "length": 75 + }, + { + "text": "Litvinenko’s widow Marina and son Anatoli, said that if such claims were .", + "length": 76 + }, + { + "text": "Government plans to allow ‘closed material procedures’, in which cases .", + "length": 76 + }, + { + "text": "Widow: Mrs Litvinenko (right) speaks to a man and woman following a hearing into the death of her husband .", + "length": 107 + }, + { + "text": "It has been claimed that both MI5 and MI6 had regular contact with the former security officer about his work in Russia.", + "length": 120 + }, + { + "text": "‘This redaction should not be taken as indicating one way or the other whether Mr Litvinenko did indeed have any such contact.", + "length": 128 + }, + { + "text": "But parts of a Scotland Yard investigation into these links will not be made public during the inquest into his death, at the Government’s request.", + "length": 149 + }, + { + "text": "Police evidence about the links between murdered Russian spy Alexander Litvinenko and British intelligence will be heard in secret, a court was told yesterday.", + "length": 159 + }, + { + "text": "’ Statement: Mrs Litvinenko speaks to members of the media as she leaves the hearing into the death of her husband, who died from polonium poisoning in London in 2006 .", + "length": 170 + }, + { + "text": "Mr Litvinenko, 43, was poisoned with radioactive polonium-210 allegedly while having tea with former KGB agents Andrei Lugovoy and Dmitry Kovturn at a central London hotel in 2006.", + "length": 180 + }, + { + "text": "'We'll get justice': Marina Litvinenko (left), widow of murdered Russian spy Alexander Litvinenko (right), speaks to members of the media as she leaves a hearing into his death in London on Thursday .", + "length": 200 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6146491169929504 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:02.405560936Z", + "first_section_created": "2025-12-23T09:32:02.40595295Z", + "last_section_published": "2025-12-23T09:32:02.406253262Z", + "all_results_received": "2025-12-23T09:32:02.473183003Z", + "output_generated": "2025-12-23T09:32:02.473350109Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:02.40595295Z", + "publish_time": "2025-12-23T09:32:02.406253262Z", + "first_worker_start": "2025-12-23T09:32:02.406715979Z", + "last_worker_end": "2025-12-23T09:32:02.472305Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:02.40672008Z", + "start_time": "2025-12-23T09:32:02.406793882Z", + "end_time": "2025-12-23T09:32:02.406867085Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:02.406909Z", + "start_time": "2025-12-23T09:32:02.407049Z", + "end_time": "2025-12-23T09:32:02.472305Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:02.406647177Z", + "start_time": "2025-12-23T09:32:02.406715979Z", + "end_time": "2025-12-23T09:32:02.406853185Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:02.406662977Z", + "start_time": "2025-12-23T09:32:02.40673218Z", + "end_time": "2025-12-23T09:32:02.406822883Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3901, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0002b6c22d2efa3af4971f9d515178bb89e31934.json b/data/output/0002b6c22d2efa3af4971f9d515178bb89e31934.json new file mode 100644 index 0000000..7825974 --- /dev/null +++ b/data/output/0002b6c22d2efa3af4971f9d515178bb89e31934.json @@ -0,0 +1,314 @@ +{ + "file_name": "0002b6c22d2efa3af4971f9d515178bb89e31934.txt", + "total_words": 534, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "maden", + "count": 13 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "his", + "count": 9 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "on", + "count": 9 + }, + { + "word": "with", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "chest area.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Mark Duell .", + "length": 12 + }, + { + "text": "striped polo T-shirt.", + "length": 21 + }, + { + "text": "Maden had claimed he had .", + "length": 26 + }, + { + "text": "13:59 EST, 14 March 2013 .", + "length": 26 + }, + { + "text": "13:06 EST, 14 March 2013 .", + "length": 26 + }, + { + "text": "gun with the barrel exposed.", + "length": 28 + }, + { + "text": "But officers examined it and found .", + "length": 36 + }, + { + "text": "DC Gary Kelly of Greater Manchester .", + "length": 37 + }, + { + "text": "Another photo showed him holding the .", + "length": 38 + }, + { + "text": "The incriminating photo was found when .", + "length": 40 + }, + { + "text": "Police matched the incriminating photo to .", + "length": 43 + }, + { + "text": "police arrested Maden and seized his iPhone.", + "length": 44 + }, + { + "text": "others of Maden dressed in the same polo shirt.", + "length": 47 + }, + { + "text": "ammunition ranging from 9mm casings to shotgun cartridges.", + "length": 58 + }, + { + "text": "just bought the phone earlier in the year and had the memory wiped.", + "length": 67 + }, + { + "text": "of Maden wearing a polo shirt that had a unique pattern across the .", + "length": 68 + }, + { + "text": "various pictures of Maden posing with the firearm whilst wearing a red .", + "length": 72 + }, + { + "text": "face covering and standing in front of a mirror dressed in a distinctive .", + "length": 74 + }, + { + "text": "Police said after the case: ‘When we examined the phone, we found images .", + "length": 76 + }, + { + "text": "He also pleaded guilty to separate charges of being concerning in the supply of Class C drugs.", + "length": 94 + }, + { + "text": "Dangerous weapon: This picture shows a knife with 19-year-old Marcel Maden's name engraved on the blade .", + "length": 105 + }, + { + "text": "Officers also seized a 12-bore double-barrelled shotgun with two magazine clips, and a plastic bag of live .", + "length": 108 + }, + { + "text": "Now and then: Maden is seen (left) in his police mugshot and (right) in another of his photos posing as a model .", + "length": 113 + }, + { + "text": "‘I would like to stress that the information you supply to us will be treated with the strictest of confidence.", + "length": 113 + }, + { + "text": "'Thankfully, these dangerous weapons have now been taken off our streets' DC Gary KellyGreater Manchester Police .", + "length": 114 + }, + { + "text": "‘When we examined further photos on Maden's phone, we discovered the same t-shirt was being worn on each occasion.", + "length": 116 + }, + { + "text": "Marcel Maden, of Salford, Greater Manchester, took photos of himself playing with a Smith \u0026 Wesson revolver in the mirror - before storing them on his phone.", + "length": 157 + }, + { + "text": "Police raided a house linked to the 19-year-old after a tip off last October and discovered an arsenal of weapons stored under the floorboards, including three handguns.", + "length": 169 + }, + { + "text": "‘Thankfully, these dangerous weapons have now been taken off our streets and I would ask anyone who may have information as to where guns and ammunition may be stored to contact us.", + "length": 183 + }, + { + "text": "A teenage model was today jailed for seven years after being unmasked as an underworld gunman when police caught him keeping pictures of himself posing with a loaded pistol on his iPhone.", + "length": 187 + }, + { + "text": "‘That, alongside him having the same physical appearance and wearing the same watch as Maden as well as the photographer holding the same iPhone was enough evidence to put him behind bars.", + "length": 190 + }, + { + "text": "Maden, who claims on his Facebook page to be ‘just a hard working person’ admitted possessing prohibited weapons at Manchester Crown Court and was jailed for seven years and eight months.", + "length": 191 + }, + { + "text": "Posing: Maden, who claims on his Facebook page to be ‘just a hard working person’ admitted possessing prohibited weapons at Manchester Crown Court and was jailed for seven years and eight months .", + "length": 200 + }, + { + "text": "Caught on camera: Marcel Maden, of Salford, Greater Manchester, was trapped by his own vanity after he took photos of himself playing with a Smith \u0026 Wesson revolver in the mirror - before storing them on his phone .", + "length": 215 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.45611539483070374 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:02.907095683Z", + "first_section_created": "2025-12-23T09:32:02.907466098Z", + "last_section_published": "2025-12-23T09:32:02.907666606Z", + "all_results_received": "2025-12-23T09:32:02.97728549Z", + "output_generated": "2025-12-23T09:32:02.977473698Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:02.907466098Z", + "publish_time": "2025-12-23T09:32:02.907666606Z", + "first_worker_start": "2025-12-23T09:32:02.908232529Z", + "last_worker_end": "2025-12-23T09:32:02.976361Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:02.908157226Z", + "start_time": "2025-12-23T09:32:02.908232529Z", + "end_time": "2025-12-23T09:32:02.908290531Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:02.908389Z", + "start_time": "2025-12-23T09:32:02.908549Z", + "end_time": "2025-12-23T09:32:02.976361Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:02.908211128Z", + "start_time": "2025-12-23T09:32:02.908302231Z", + "end_time": "2025-12-23T09:32:02.908439437Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:02.908190327Z", + "start_time": "2025-12-23T09:32:02.908280731Z", + "end_time": "2025-12-23T09:32:02.908349233Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3175, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/0002c17436637c4fe1837c935c04de47adb18e9a.json b/data/output/0002c17436637c4fe1837c935c04de47adb18e9a.json new file mode 100644 index 0000000..27b1c03 --- /dev/null +++ b/data/output/0002c17436637c4fe1837c935c04de47adb18e9a.json @@ -0,0 +1,376 @@ +{ + "file_name": "0002c17436637c4fe1837c935c04de47adb18e9a.txt", + "total_words": 951, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "in", + "count": 39 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "burkhart", + "count": 20 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "said", + "count": 17 + }, + { + "word": "was", + "count": 15 + }, + { + "word": "fires", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "Dr.", + "length": 3 + }, + { + "text": "Marshals,\" Nordskog wrote.", + "length": 26 + }, + { + "text": "85 million bond and surrender his German passport.", + "length": 50 + }, + { + "text": "She declined to cite the case or Burkhart's role in it.", + "length": 55 + }, + { + "text": "To go free while awaiting trial, Burkhart must post a $2.", + "length": 57 + }, + { + "text": "Meanwhile, both Burkharts are housed in a Los Angeles jail.", + "length": 59 + }, + { + "text": "Burkhart was in court on Wednesday for a preliminary hearing.", + "length": 61 + }, + { + "text": "The district attorney called his courtroom behavior \"very bizarre.", + "length": 66 + }, + { + "text": "Shortly thereafter, the defendant was ejected from the courtroom by Deputy U.", + "length": 77 + }, + { + "text": "' The defendant also attempted to communicate with his mother who was in custody.", + "length": 81 + }, + { + "text": "\"If found true, the allegation could mean additional custody time for the defendant.", + "length": 84 + }, + { + "text": "\"The people believe he has engaged in this conduct because he has a hatred for Americans.", + "length": 89 + }, + { + "text": "\" The diagnosis was spelled out in a letter she wrote for the small-claims court case, Stancheva said.", + "length": 102 + }, + { + "text": "\"While in the audience, the defendant (Burkhart) began yelling in an angry manner, 'F--k all Americans.", + "length": 103 + }, + { + "text": "The 53-year-old German woman is wanted on 16 counts of fraud and three counts of embezzlement, he said.", + "length": 103 + }, + { + "text": "No one was hurt in the fires, but property damage costs are likely to reach $3 million, authorities said.", + "length": 105 + }, + { + "text": "\"The vast majority of these fires occurred late at night when the occupants of the apartment buildings were asleep.", + "length": 115 + }, + { + "text": "Burkhart kept his eyes closed and remained limp during most of his hearing, requiring sheriff's deputies to hold him up.", + "length": 120 + }, + { + "text": "Burkhart, a 24-year-old German national, has been charged with 37 counts of arson following a string of 52 fires in Los Angeles.", + "length": 128 + }, + { + "text": "\"I was shocked and dismayed at what happened in Los Angeles, and it appears he was not being treated for his depression,\" she said.", + "length": 131 + }, + { + "text": "Burkhart's mother is scheduled for another extradition hearing Friday, while he is due back in court for arraignment on January 24.", + "length": 131 + }, + { + "text": "The charges include an allegation that she failed to pay for a breast enhancement operation performed on her in 2004, Meilinger said.", + "length": 133 + }, + { + "text": "Most of the German charges, however, stem from phony real estate deals that Dorothee Burkhart allegedly conducted between 2000 and 2006.", + "length": 136 + }, + { + "text": "It was revealed that Burkhart is also under investigation for arson and fraud in relation to a fire in Neukirchen, near Frankfurt, Germany.", + "length": 139 + }, + { + "text": "The criminal complaint filed Wednesday also alleged that the fires were \"caused by use of a device designed to accelerate the fire,\" Cooley said.", + "length": 145 + }, + { + "text": "Stancheva, citing doctor-patient confidentiality, would not elaborate further, nor would she identify the psychiatrist involved in the diagnosis.", + "length": 145 + }, + { + "text": "The charges are in connection with arson fires at 12 locations scattered through Hollywood, West Hollywood and Sherman Oaks, according to authorities.", + "length": 150 + }, + { + "text": "Stancheva said she and other doctors including a psychiatrist diagnosed Burkhart with \"autism, severe anxiety, post-traumatic stress disorder and depression.", + "length": 157 + }, + { + "text": "Stancheva said the refugee applications by Burkhart and his mother were denied by the Canadian government, and she has not seen Burkhart since early March of 2010.", + "length": 163 + }, + { + "text": "\"I was asked to diagnose and treat Harry to support a claim explaining why he was unable to show up in a small-claims court case,\" Stancheva told CNN in a phone interview.", + "length": 171 + }, + { + "text": "Dorothee Burkhart was arrested a day before on an international arrest warrant issued by a district court in Frankfurt, Germany, said federal court spokesman Gunther Meilinger.", + "length": 176 + }, + { + "text": "\"It is my opinion based on my experience that it is highly likely the defendant has a history of setting arson fires in Germany before he came to the United States,\" Nordskog wrote.", + "length": 181 + }, + { + "text": "\" Investigator Edward Nordskog's affidavit detailed Burkhart's behavior a day before the fires began, when he was in a federal courtroom during extradition proceedings for his mother.", + "length": 183 + }, + { + "text": "Prosecutors said his \"rage against Americans,\" triggered by his mother's arrest last week, motivated his \"campaign of terror\" with dozens of fires in Hollywood and nearby communities.", + "length": 183 + }, + { + "text": "\" Carney told the court Burkhart would flee the country if he was allowed out of jail on bond, but Los Angeles Superior Court Judge Upinder Kalra said he had no choice but to set bail.", + "length": 184 + }, + { + "text": "Cooley called it \"almost attempted murder,\" because people were sleeping in apartments above where Burkhart allegedly set cars on fire with incendiary devices placed under their engines.", + "length": 186 + }, + { + "text": "Blaga Stancheva, a family physician and specialist in obstetrics, said both Burkhart and his mother, Dorothee, were her patients in Vancouver while both were applying for refugee status in Canada.", + "length": 196 + }, + { + "text": "\" \"In numerous instances, the cars were parked in carports, resulting in the fires spreading to the adjacent occupied apartment buildings,\" a sworn affidavit from a Los Angeles arson investigator said.", + "length": 201 + }, + { + "text": "A search of Burkhart's Hollywood apartment found newspaper clippings about the Los Angeles fires and articles from Germany reporting similar car fires in Frankfurt, Germany in September, 2011, the investigator said.", + "length": 215 + }, + { + "text": "Los Angeles (CNN) -- A medical doctor in Vancouver, British Columbia, said Thursday that California arson suspect Harry Burkhart suffered from severe mental illness in 2010, when she examined him as part of a team of doctors.", + "length": 225 + }, + { + "text": "that the defendant's criminal spree was motivated by his rage against Americans and that by setting these fires the defendant intended to harm and terrorize as many residents of the city and county of Los Angeles as possible,\" Nordskog wrote.", + "length": 242 + }, + { + "text": "The worst arson sprees in the city's history began last Friday morning with a car fire in Hollywood that spread to apartments above a garage, but no new fires have happened since Burkhart was arrested Monday, Los Angeles District Attorney Steve Cooley said.", + "length": 257 + }, + { + "text": "\" \"This defendant has engaged in a protracted campaign in which he has set, the people believe, upwards of 52 arson fires in what essentially amounts to a campaign of terror against this community,\" Los Angeles County Deputy District Attorney Sean Carney said.", + "length": 260 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6922233402729034 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:03.408432131Z", + "first_section_created": "2025-12-23T09:32:03.408833647Z", + "last_section_published": "2025-12-23T09:32:03.409223963Z", + "all_results_received": "2025-12-23T09:32:03.49067922Z", + "output_generated": "2025-12-23T09:32:03.490983132Z", + "total_processing_time_ms": 82, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 81, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:03.408833647Z", + "publish_time": "2025-12-23T09:32:03.409068156Z", + "first_worker_start": "2025-12-23T09:32:03.409594778Z", + "last_worker_end": "2025-12-23T09:32:03.48965Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:03.409583977Z", + "start_time": "2025-12-23T09:32:03.40965828Z", + "end_time": "2025-12-23T09:32:03.409737083Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:03.409936Z", + "start_time": "2025-12-23T09:32:03.410079Z", + "end_time": "2025-12-23T09:32:03.48965Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:03.40966088Z", + "start_time": "2025-12-23T09:32:03.409725283Z", + "end_time": "2025-12-23T09:32:03.40990289Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:03.409542875Z", + "start_time": "2025-12-23T09:32:03.409594778Z", + "end_time": "2025-12-23T09:32:03.409669181Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:03.409110358Z", + "publish_time": "2025-12-23T09:32:03.409223963Z", + "first_worker_start": "2025-12-23T09:32:03.409594978Z", + "last_worker_end": "2025-12-23T09:32:03.481297Z", + "total_journey_time_ms": 72, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:03.40966578Z", + "start_time": "2025-12-23T09:32:03.409716982Z", + "end_time": "2025-12-23T09:32:03.409738783Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:03.409907Z", + "start_time": "2025-12-23T09:32:03.410062Z", + "end_time": "2025-12-23T09:32:03.481297Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 71 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:03.409679681Z", + "start_time": "2025-12-23T09:32:03.409714582Z", + "end_time": "2025-12-23T09:32:03.409746484Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:03.409544776Z", + "start_time": "2025-12-23T09:32:03.409594978Z", + "end_time": "2025-12-23T09:32:03.409607878Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 150, + "min_processing_ms": 71, + "max_processing_ms": 79, + "avg_processing_ms": 75, + "median_processing_ms": 79, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2914, + "slowest_section_id": 0, + "slowest_section_time_ms": 80 + } +} diff --git a/data/output/00030c0cba08a5b361fd3ecb4dbf232da8269c55.json b/data/output/00030c0cba08a5b361fd3ecb4dbf232da8269c55.json new file mode 100644 index 0000000..044cad4 --- /dev/null +++ b/data/output/00030c0cba08a5b361fd3ecb4dbf232da8269c55.json @@ -0,0 +1,286 @@ +{ + "file_name": "00030c0cba08a5b361fd3ecb4dbf232da8269c55.txt", + "total_words": 382, + "top_n_words": [ + { + "word": "in", + "count": 15 + }, + { + "word": "the", + "count": 14 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "is", + "count": 6 + }, + { + "word": "trust", + "count": 6 + }, + { + "word": "by", + "count": 5 + }, + { + "word": "hedgehog", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Graham Smith .", + "length": 14 + }, + { + "text": "05:32 EST, 23 June 2012 .", + "length": 25 + }, + { + "text": "06:02 EST, 23 June 2012 .", + "length": 25 + }, + { + "text": "are an endangered animal.", + "length": 25 + }, + { + "text": "because of people not looking.", + "length": 30 + }, + { + "text": "People must look in the garden.", + "length": 31 + }, + { + "text": "'By 2025 we might not have hedgehogs .", + "length": 38 + }, + { + "text": "Mrs Roberts, who opened the hospital .", + "length": 38 + }, + { + "text": "'I've got 23 babies that are all here .", + "length": 39 + }, + { + "text": "been gradually weaned off human contact.", + "length": 40 + }, + { + "text": "I want people to be aware because they .", + "length": 40 + }, + { + "text": "Her mother was injured when a lawnmower accidentally ran her over.", + "length": 66 + }, + { + "text": "any more thanks to the dangers of roads, lawnmowers and pesticides.", + "length": 67 + }, + { + "text": "in 2001, said none of the hedgehogs will be released until they have .", + "length": 70 + }, + { + "text": "She said: 'When they roll into a ball on contact we know they are ready to go.", + "length": 78 + }, + { + "text": "All of them, including the amputees, will eventually be released back into the wild.", + "length": 84 + }, + { + "text": "She said: 'A lot of us haven't mowed our lawns because of the weather and people don't look.", + "length": 92 + }, + { + "text": "Owner Barbara Roberts, 60, has now issued a plea to householders tackling their summer lawns.", + "length": 93 + }, + { + "text": "She is now being nursed back to health alongside 22 hedgehogs others left injured in gardening accidents.", + "length": 105 + }, + { + "text": "Honey is now being nursed back to health alongside 22 hedgehogs others left injured in gardening accidents .", + "length": 108 + }, + { + "text": "The trust names each creature that comes in and recent intakes include Willow, Charlotte, Basil, Brad and Roger.", + "length": 112 + }, + { + "text": "Some of Ms Roberts' patients have been rescued after being abandoned by terrified mothers, while 11 of them are amputees.", + "length": 121 + }, + { + "text": "This two-day-old baby hedgehog is in a prickly situation as she waits for her mother to recover from a gardening accident.", + "length": 122 + }, + { + "text": "In the last two weeks the trust has seen a surge in new admissions, including several amputees, thanks to strimming accidents.", + "length": 126 + }, + { + "text": "Making friends: The trust names each creature that comes in and recent intakes include Willow, Charlotte, Basil, Brad and Roger .", + "length": 129 + }, + { + "text": "The newborn, called Honey, is in safe hands as she is looked after by animal welfare officers at Withington Hedgehog Hospital Trust in Manchester.", + "length": 146 + }, + { + "text": "Sanctuary: In the last two weeks the hedgehog trust has seen a surge in new admissions, including several amputees, thanks to strimming accidents .", + "length": 147 + }, + { + "text": "Newborn: Two-day-old hedgehog Honey is being looked after by animal welfare officers at Withington Hedgehog Hospital Trust in Manchester after her mother was injured in a lawnmower accident .", + "length": 191 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4658172130584717 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:03.910021389Z", + "first_section_created": "2025-12-23T09:32:03.911871763Z", + "last_section_published": "2025-12-23T09:32:03.912064571Z", + "all_results_received": "2025-12-23T09:32:03.982264978Z", + "output_generated": "2025-12-23T09:32:03.982407884Z", + "total_processing_time_ms": 72, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:03.911871763Z", + "publish_time": "2025-12-23T09:32:03.912064571Z", + "first_worker_start": "2025-12-23T09:32:03.912685196Z", + "last_worker_end": "2025-12-23T09:32:03.981273Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:03.912668795Z", + "start_time": "2025-12-23T09:32:03.912742698Z", + "end_time": "2025-12-23T09:32:03.9127879Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:03.912846Z", + "start_time": "2025-12-23T09:32:03.912981Z", + "end_time": "2025-12-23T09:32:03.981273Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:03.912623893Z", + "start_time": "2025-12-23T09:32:03.912685196Z", + "end_time": "2025-12-23T09:32:03.912820901Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:03.912600392Z", + "start_time": "2025-12-23T09:32:03.912687996Z", + "end_time": "2025-12-23T09:32:03.912734497Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2263, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/0003ad6ef0c37534f80b55b4235108024b407f0b.json b/data/output/0003ad6ef0c37534f80b55b4235108024b407f0b.json new file mode 100644 index 0000000..f7b5e91 --- /dev/null +++ b/data/output/0003ad6ef0c37534f80b55b4235108024b407f0b.json @@ -0,0 +1,222 @@ +{ + "file_name": "0003ad6ef0c37534f80b55b4235108024b407f0b.txt", + "total_words": 291, + "top_n_words": [ + { + "word": "the", + "count": 14 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "rape", + "count": 7 + }, + { + "word": "not", + "count": 6 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "and", + "count": 5 + }, + { + "word": "as", + "count": 5 + }, + { + "word": "with", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Mark Gagan.", + "length": 11 + }, + { + "text": "He did not wear a protective vest.", + "length": 34 + }, + { + "text": "Montano, who was held in lieu of $1.", + "length": 36 + }, + { + "text": "The court described Morales as younger than 16, and did not give an age for Peter.", + "length": 82 + }, + { + "text": "Four other teenage suspects were arraigned Thursday on charges connected to the rape.", + "length": 85 + }, + { + "text": "The victim was taken to the hospital in critical condition, but was released Wednesday.", + "length": 87 + }, + { + "text": "All three juveniles, who wore bulletproof vests at the hearing, were charged as adults.", + "length": 87 + }, + { + "text": "A fourth person, Manuel Ortega, 19, appeared separately without an attorney and did not enter a plea.", + "length": 101 + }, + { + "text": "Another person, Salvador Rodriguez, 21, was arrested Tuesday night, but he was not in court Thursday.", + "length": 101 + }, + { + "text": "3 million bail, is accused of taking part in what police said was a 2½-hour assault on the Richmond High School campus.", + "length": 120 + }, + { + "text": "Cody Ray Smith, described by the court as older than 14, pleaded not guilty to charges of rape with a foreign object and rape by force.", + "length": 135 + }, + { + "text": "Montano was arrested Thursday evening in San Pablo, California, a small town about two miles from the city of Richmond, where the crime took place.", + "length": 147 + }, + { + "text": "Police said as many as 10 people were involved in the rape in a dimly lit back alley at the school, while another 10 people watched without calling 911.", + "length": 152 + }, + { + "text": "Jose Carlos Montano, 18, was arrested on charges of felony rape, rape in concert with force, and penetration with a foreign object, said Richmond Police Lt.", + "length": 156 + }, + { + "text": "Two other juveniles, Ari Abdallah Morales and Marcelles James Peter, appeared with Smith at the Contra Costa County Superior Court, but did not enter a plea.", + "length": 157 + }, + { + "text": "(CNN) -- Police arrested another teen Thursday, the sixth suspect jailed in connection with the gang rape of a 15-year-old girl on a northern California high school campus.", + "length": 172 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6561007499694824 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:04.412621187Z", + "first_section_created": "2025-12-23T09:32:04.412882898Z", + "last_section_published": "2025-12-23T09:32:04.413100607Z", + "all_results_received": "2025-12-23T09:32:04.477488781Z", + "output_generated": "2025-12-23T09:32:04.477598486Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:04.412882898Z", + "publish_time": "2025-12-23T09:32:04.413100607Z", + "first_worker_start": "2025-12-23T09:32:04.413558825Z", + "last_worker_end": "2025-12-23T09:32:04.476571Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:04.413547424Z", + "start_time": "2025-12-23T09:32:04.413604227Z", + "end_time": "2025-12-23T09:32:04.413637928Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:04.41373Z", + "start_time": "2025-12-23T09:32:04.413869Z", + "end_time": "2025-12-23T09:32:04.476571Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:04.413574325Z", + "start_time": "2025-12-23T09:32:04.413634728Z", + "end_time": "2025-12-23T09:32:04.413705331Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:04.413500723Z", + "start_time": "2025-12-23T09:32:04.413558825Z", + "end_time": "2025-12-23T09:32:04.413593726Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1677, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0003b7e88347128efada6f9b75f7f1bac237dee8.json b/data/output/0003b7e88347128efada6f9b75f7f1bac237dee8.json new file mode 100644 index 0000000..59f7c1b --- /dev/null +++ b/data/output/0003b7e88347128efada6f9b75f7f1bac237dee8.json @@ -0,0 +1,408 @@ +{ + "file_name": "0003b7e88347128efada6f9b75f7f1bac237dee8.txt", + "total_words": 964, + "top_n_words": [ + { + "word": "the", + "count": 58 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "of", + "count": 28 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "pigeons", + "count": 22 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "i", + "count": 20 + }, + { + "word": "abian", + "count": 16 + }, + { + "word": "for", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "'I've .", + "length": 7 + }, + { + "text": "'There are no restrictions in my work.", + "length": 38 + }, + { + "text": "a bird that's not cooped up in some zoo.", + "length": 40 + }, + { + "text": "In my eyes, it's the ultimate way to go!", + "length": 40 + }, + { + "text": "Many city locations, such as Trafalgar .", + "length": 40 + }, + { + "text": "A pigeon has broken free from the flock .", + "length": 41 + }, + { + "text": "Close up: pigeons have become used to the city traffic .", + "length": 56 + }, + { + "text": "I would never save a pigeon from the clutches of a raptor.", + "length": 58 + }, + { + "text": "'They form awesome, defensive flocks when hawks fly overhead.", + "length": 61 + }, + { + "text": "'My favorite type of photo is one that exudes motion,' he said.", + "length": 63 + }, + { + "text": "Fly like a plane: 'Pigeons are exceptional fliers,' adds Abian.", + "length": 63 + }, + { + "text": "'I have now become intimately acquainted with the local pigeons.", + "length": 64 + }, + { + "text": "It's difficult to catch them in motion and I enjoy the challenge.", + "length": 65 + }, + { + "text": "'My favorite type of photo is one that exudes motion,' Abian says.", + "length": 66 + }, + { + "text": "York alone is believed to be home to more than one million pigeons.", + "length": 67 + }, + { + "text": "Photographer Abian has dedicated his life to picturing the pigeon .", + "length": 67 + }, + { + "text": "had people interrupt me to ask what I'm photographing in an accusatory .", + "length": 72 + }, + { + "text": "Square in London, are famous for their large pigeon populations and New .", + "length": 73 + }, + { + "text": "I would never save a pigeon from the clutches of a raptor,' Abian added .", + "length": 73 + }, + { + "text": "manner because they can't fathom why anybody would be taking pictures of .", + "length": 74 + }, + { + "text": "'This was huge because it enabled me to start catching the birds in flight.", + "length": 75 + }, + { + "text": "Taking the plunge: A pigeon jumps from a window ledge towards the traffic below .", + "length": 81 + }, + { + "text": "' Abian now uses a D700 along with a Nikon 300mm f4 lens for his pigeon pictures.", + "length": 81 + }, + { + "text": "I was shocked that these predators were in such an urban setting and I needed proof.", + "length": 84 + }, + { + "text": "Soft landing: Parks and city squares are ideal for pigeons who pick up food crumbs .", + "length": 84 + }, + { + "text": "One in a million: New York alone is believed to be home to more than one million pigeons .", + "length": 90 + }, + { + "text": "Feathers flying: two pigeons fight for air space or sort out another matter between them .", + "length": 90 + }, + { + "text": "'They're graceful creatures and one can even begin to envy the pigeon's free-flowing lifestyle.", + "length": 95 + }, + { + "text": "'I would absolutely relish the opportunity to photograph pigeons in such a lovely city as London.", + "length": 97 + }, + { + "text": "'I've also had the cops called on me on three separate occasions while I was birding on my own block.", + "length": 101 + }, + { + "text": "He said: 'I began snapping photos of birds of prey in my neighborhood with a cellphone and binoculars.", + "length": 102 + }, + { + "text": "Flying low: 'I was shocked that these predators were in such an urban setting and I needed proof,' adds Abian .", + "length": 111 + }, + { + "text": "Abian, 27, from the Bronx, New York, US, said: 'Pigeons everywhere deserve their metaphorical moment in the sun.", + "length": 112 + }, + { + "text": "Thinking about it: ledges of buildings are a substitute for sea cliffs for pigeons who have adapted to urban life .", + "length": 115 + }, + { + "text": "Many city locations, such as Trafalgar Square in London and New York, are famous for their large pigeon populations .", + "length": 117 + }, + { + "text": "'They are constantly on the look out for predators and must cooperate with each other to stay on top of these threats .", + "length": 119 + }, + { + "text": "' With more than 500 finished pictures of the birds, photographer Abian hopes to show them off to people around the world .", + "length": 123 + }, + { + "text": "'I would absolutely relish the opportunity to photograph pigeons in such a lovely city as London' 'There are no restrictions in my work.", + "length": 136 + }, + { + "text": "Pigeons in Trafalgar Square were considered a tourist attraction with street vendors selling packets of seeds for visitors to feed them .", + "length": 137 + }, + { + "text": "' From the Bronx: New York Photographer Abian Sacks, pictured, believes the birds to be the cream of the crop and admires their lifestyle .", + "length": 139 + }, + { + "text": "'Pigeons are exceptional fliers, they are constantly on the look out for predators and must cooperate with each other to stay on top of these threats.", + "length": 150 + }, + { + "text": "'How different is a line of pigeons sunbathing on top of a building from the morning commuters on the subway platform waiting for the train to arrive?", + "length": 150 + }, + { + "text": "Abian has dedicated his life to picturing the pigeon, and with more than 500 finished pictures of the birds, he hopes to show them off to people around the world.", + "length": 162 + }, + { + "text": "For many years, the pigeons in Trafalgar Square were considered a tourist attraction, with street vendors selling packets of seeds for visitors to feed the pigeons.", + "length": 164 + }, + { + "text": "'I like when the pigeons dip below the horizon, this way you have the nice contrast of the crisp bird in focus with the colorful, dreamy bokeh [blur] of the backdrop.", + "length": 166 + }, + { + "text": "'I like when the pigeons dip below the horizon, this way you have the nice contrast of the crisp bird in focus with the colorful, dreamy bokeh of the backdrop,' adds Abian .", + "length": 173 + }, + { + "text": "While many consider pigeons to be 'winged rats' and urban vermin, Abian believes the birds to be the cream of the crop and he even goes as far to say he admires their lifestyle.", + "length": 177 + }, + { + "text": "An urban wildlife photographer with a passion for the pigeon has illustrated his love for his feathered friends through a collection of more than 500 up close and personal pictures.", + "length": 181 + }, + { + "text": "' Feral pigeons find the ledges of buildings to be a substitute for sea cliffs, have become adapted to urban life, and are abundant in towns and cities throughout much of the world.", + "length": 181 + }, + { + "text": "'After a few months of scouting perched hawks from my window and rushing into the winter chill to take dozens of the same photo, I upgraded to an entry level DSLR and a 150-500mm zoom lens.", + "length": 189 + }, + { + "text": "Abian Sacks goes to every extreme to catch the city birds in their most natural environments, climbing up fire escapes or scaling the tops of the highest buildings in a bid to get the best shots.", + "length": 195 + }, + { + "text": "Photographer Abian Sacks, 27: 'They're graceful creatures and one can even begin to envy the pigeon's free-flowing lifestyle' Photographer Abian adds: 'I think that pigeons are a lot like us - big city dwellers' 'I think that pigeons are a lot like us - big city dwellers,' said Abian.", + "length": 285 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.39290615916252136 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:04.913269708Z", + "first_section_created": "2025-12-23T09:32:04.915179984Z", + "last_section_published": "2025-12-23T09:32:04.915615001Z", + "all_results_received": "2025-12-23T09:32:05.015160082Z", + "output_generated": "2025-12-23T09:32:05.01534729Z", + "total_processing_time_ms": 102, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 99, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:04.915179984Z", + "publish_time": "2025-12-23T09:32:04.915444595Z", + "first_worker_start": "2025-12-23T09:32:04.915930914Z", + "last_worker_end": "2025-12-23T09:32:04.9887Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:04.916030718Z", + "start_time": "2025-12-23T09:32:04.916098821Z", + "end_time": "2025-12-23T09:32:04.916202725Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:04.916128Z", + "start_time": "2025-12-23T09:32:04.916248Z", + "end_time": "2025-12-23T09:32:04.9887Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:04.915967816Z", + "start_time": "2025-12-23T09:32:04.916035718Z", + "end_time": "2025-12-23T09:32:04.916203425Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:04.915846411Z", + "start_time": "2025-12-23T09:32:04.915930914Z", + "end_time": "2025-12-23T09:32:04.916036018Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:04.915515397Z", + "publish_time": "2025-12-23T09:32:04.915615001Z", + "first_worker_start": "2025-12-23T09:32:04.916042019Z", + "last_worker_end": "2025-12-23T09:32:05.014289Z", + "total_journey_time_ms": 98, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:04.91607462Z", + "start_time": "2025-12-23T09:32:04.916125322Z", + "end_time": "2025-12-23T09:32:04.916140022Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:04.916309Z", + "start_time": "2025-12-23T09:32:04.91644Z", + "end_time": "2025-12-23T09:32:05.014289Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 97 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:04.916022318Z", + "start_time": "2025-12-23T09:32:04.916064419Z", + "end_time": "2025-12-23T09:32:04.916096621Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:04.915964715Z", + "start_time": "2025-12-23T09:32:04.916042019Z", + "end_time": "2025-12-23T09:32:04.916058219Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 169, + "min_processing_ms": 72, + "max_processing_ms": 97, + "avg_processing_ms": 84, + "median_processing_ms": 97, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2718, + "slowest_section_id": 1, + "slowest_section_time_ms": 98 + } +} diff --git a/data/output/0003f6d029bce7b5b4fa6cda724d9035bf7d50fe.json b/data/output/0003f6d029bce7b5b4fa6cda724d9035bf7d50fe.json new file mode 100644 index 0000000..1fd2f38 --- /dev/null +++ b/data/output/0003f6d029bce7b5b4fa6cda724d9035bf7d50fe.json @@ -0,0 +1,322 @@ +{ + "file_name": "0003f6d029bce7b5b4fa6cda724d9035bf7d50fe.txt", + "total_words": 471, + "top_n_words": [ + { + "word": "the", + "count": 48 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "million", + "count": 14 + }, + { + "word": "for", + "count": 12 + }, + { + "word": "property", + "count": 12 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "road", + "count": 9 + }, + { + "word": "toorak", + "count": 8 + }, + { + "word": "year", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "3.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "2 million .", + "length": 11 + }, + { + "text": "2 million .", + "length": 11 + }, + { + "text": "8 million .", + "length": 11 + }, + { + "text": "55 million= .", + "length": 13 + }, + { + "text": "Towers Road, Toorak, $18.", + "length": 25 + }, + { + "text": "7 million – OFF MARKET .", + "length": 26 + }, + { + "text": "The Smorgon family paid $3.", + "length": 27 + }, + { + "text": "The Smorgon family paid $3.", + "length": 27 + }, + { + "text": "2 million, reports The Age.", + "length": 28 + }, + { + "text": "05 million for the property in 1995 .", + "length": 37 + }, + { + "text": "18-20 Shakespeare Grove, Hawthorn, $19.", + "length": 39 + }, + { + "text": "750 Orrong Road, Toorak, speculated $19.", + "length": 40 + }, + { + "text": "4 St Georges Road, Toorak, speculated $23.", + "length": 42 + }, + { + "text": "7 million for 14 St Georges Road, in the same suburb of Toorak.", + "length": 63 + }, + { + "text": "7 million for 14 St Georges Road, in the same suburb of Toorak .", + "length": 64 + }, + { + "text": "3464-3468 Point Nepean Road, Sorrento $19 million – OFF MARKET .", + "length": 66 + }, + { + "text": "The property boasts both formal and family living and dining rooms .", + "length": 68 + }, + { + "text": "The gymnasium is situated beside a cinema room, games room, large living area .", + "length": 79 + }, + { + "text": "The sale comes in the same week as the top sale of the year for Victoria, the $23.", + "length": 82 + }, + { + "text": "The sale comes in the same week as the top sale of the year for Victoria, the $23.", + "length": 82 + }, + { + "text": "The monumental alfresco terrace gazes to the north onto an expansive garden sanctuary .", + "length": 87 + }, + { + "text": "The company was created by Eric Smorgman, Normans grandfather, reports Property Observer.", + "length": 89 + }, + { + "text": "The property was owned by Norman and Tania Smorgon, the heirs to the affluent Smorgon family .", + "length": 94 + }, + { + "text": "Norman Smorgon is the head of Escor, the private company that handles the family’s finances.", + "length": 94 + }, + { + "text": "Outside, an Allan Powell designed pavilion overlooks the north and south floodlit tennis court .", + "length": 96 + }, + { + "text": "The luxury abode, which offers oak flooring and a subfloor cellar, was on the market for over a year .", + "length": 102 + }, + { + "text": "A family in Melbourne have sold their domestic property for just under $20 million, after a year on the market.", + "length": 111 + }, + { + "text": "The six bedroom, seven bathroom abode was the third most expensive private property sale of the year in Victoria .", + "length": 114 + }, + { + "text": "The entrance to the luxurious 750 Orrong Road Toorak, southeast Melbourne, which has just sold for a speculated $19.", + "length": 117 + }, + { + "text": "Toorak, located in southeast Melbourne, is home to three of the top five biggest Victorian property sales of the year.", + "length": 118 + }, + { + "text": "The 5,500sqm/59,000sqft property offers an outstanding marble kitchen, fit with a Gaggenau walk-in commercial fridge .", + "length": 118 + }, + { + "text": "Eric Smorgman migrated from Russia to Melbourne in 1926, in a bid to escape the anti-Semitic movement headed by Stalin.", + "length": 119 + }, + { + "text": "05 million for the property in 1995, which was then owned by the family of prominent Polish clothing manufacturer Abe Goldberg.", + "length": 127 + }, + { + "text": "The most expensive property to have been ever sold in Melbourne was a St Georges Road trophy estate, which sold for $24 million in 2010.", + "length": 136 + }, + { + "text": "The sale comes in as the third most expensive private property sale of the year in Victoria, after a year of soaring property prices across the nation.", + "length": 151 + }, + { + "text": "Norman and Tania Smorgon initially wanted $25 million for the luxurious six bedroom abode in 750 Orrong Road Toorak, southeast Melbourne, but after multiple sales campaigns they settled for a speculated $19.", + "length": 207 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5209532380104065 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:05.416398227Z", + "first_section_created": "2025-12-23T09:32:05.416746641Z", + "last_section_published": "2025-12-23T09:32:05.416930248Z", + "all_results_received": "2025-12-23T09:32:05.479454849Z", + "output_generated": "2025-12-23T09:32:05.479610755Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:05.416746641Z", + "publish_time": "2025-12-23T09:32:05.416930248Z", + "first_worker_start": "2025-12-23T09:32:05.417483771Z", + "last_worker_end": "2025-12-23T09:32:05.478521Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:05.417530572Z", + "start_time": "2025-12-23T09:32:05.417600875Z", + "end_time": "2025-12-23T09:32:05.417671478Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:05.417723Z", + "start_time": "2025-12-23T09:32:05.417887Z", + "end_time": "2025-12-23T09:32:05.478521Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:05.41747127Z", + "start_time": "2025-12-23T09:32:05.417537073Z", + "end_time": "2025-12-23T09:32:05.417685379Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:05.417418868Z", + "start_time": "2025-12-23T09:32:05.417483771Z", + "end_time": "2025-12-23T09:32:05.417525472Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2761, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/000424152bce9d9f36cb43884dacf16b43052463.json b/data/output/000424152bce9d9f36cb43884dacf16b43052463.json new file mode 100644 index 0000000..9325956 --- /dev/null +++ b/data/output/000424152bce9d9f36cb43884dacf16b43052463.json @@ -0,0 +1,278 @@ +{ + "file_name": "000424152bce9d9f36cb43884dacf16b43052463.txt", + "total_words": 585, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "people", + "count": 14 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "by", + "count": 8 + }, + { + "word": "said", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Chris Pleasance .", + "length": 17 + }, + { + "text": "07:51 EST, 19 October 2013 .", + "length": 28 + }, + { + "text": "03:25 EST, 19 October 2013 .", + "length": 28 + }, + { + "text": "'Older people deserve the utmost respect and dignity in care.", + "length": 61 + }, + { + "text": "It is about changing attitudes in society, and making time in our busy lives.", + "length": 77 + }, + { + "text": "' Baroness Wilkins, a Labour peer, attacked the suggestion in the House of Lords.", + "length": 81 + }, + { + "text": "It requires people whose presence does not depend on the state of a local authority’s budget.", + "length": 95 + }, + { + "text": "'Loneliness requires human contact, touch and empathy, and the everyday stimulus of news and gossip.", + "length": 100 + }, + { + "text": "'My wife is Chinese and I am struck by the reverence and respect for older people in Asian culture,' he said.", + "length": 109 + }, + { + "text": "Lonely pensioners should be given iPads to provide a 'low-cost link to the outside world', ministers have said.", + "length": 111 + }, + { + "text": "In his speech Mr Hunt said he was struck by the reverence people in Asian countries show to their elderly people.", + "length": 113 + }, + { + "text": "There is no substitute for human contact and the emotional support and companionship of a close friend or family member.", + "length": 120 + }, + { + "text": "The number of people aged 60 or over hit 14million for the first time in 2011 and is expected to rise to 20 million by 2031.", + "length": 124 + }, + { + "text": "Government ministers have said that iPads could be given to elderly people who could use video conferencing and email to stay in touch .", + "length": 136 + }, + { + "text": "'And uncomfortable though it is to say it, it will only start with changes in the way we personally treat our own parents and grandparents.", + "length": 139 + }, + { + "text": "' Health Secretary Jeremy Hunt has said that there are five million people living in the UK whose main source of company is the television .", + "length": 140 + }, + { + "text": "The plans were attacked by Labour peer Baroness Wilkes who said they had been dreamed up by a 25-year-old with no idea what it is like to be old .", + "length": 146 + }, + { + "text": "The advice has emerged just a day after Health Secretary Jeremy Hunt branded the forgotten millions of elderly people in the UK a 'national disgrace'.", + "length": 150 + }, + { + "text": "' A Department of Health spokesperson said: 'Loneliness amongst older people is a serious problem, and one that cannot be tackled using a 'quick fix' solution.", + "length": 159 + }, + { + "text": "At the National Children and Adults Services conference yesterday, Mr Hunt revealed that around five million people say television is their main form of company.", + "length": 161 + }, + { + "text": "Hunt added: 'If we are to tackle the challenge of an ageing society, we must learn from this -- and restore and reinvigorate the social contract between generations.", + "length": 165 + }, + { + "text": "' Concerns about social care have been heightened in recent months by a series of scandals of abuse and neglect in care homes, which house about 400,000 people in England.", + "length": 171 + }, + { + "text": "She told The Telegraph: 'This surely was dreamed up by a 25 year-old, totally unaware of the realities of being an 85 year-old woman living on her own in frail health in 2030.", + "length": 175 + }, + { + "text": "But it has now become apparent that in July ministers suggested video conferencing and emails on tablet computers as a way for elderly people to stay in touch with their families.", + "length": 179 + }, + { + "text": "A recent report also sparked alarm about the care local authorities were providing to people in their own homes, revealing that three-quarters allowed carers to spend just 15 minutes with each of their patients.", + "length": 211 + }, + { + "text": "In a response to a House of Lords inquiry the Government said: 'Systems are now available which use lightweight touchscreen tablets enabled with wi-fi and broadband to allow users to send and receive email and text messages, share photos and video conference with family, friends, care providers and health professionals via a universal messaging system.", + "length": 354 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6925534009933472 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:05.918091389Z", + "first_section_created": "2025-12-23T09:32:05.918557908Z", + "last_section_published": "2025-12-23T09:32:05.918761616Z", + "all_results_received": "2025-12-23T09:32:05.987953983Z", + "output_generated": "2025-12-23T09:32:05.988110189Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:05.918557908Z", + "publish_time": "2025-12-23T09:32:05.918761616Z", + "first_worker_start": "2025-12-23T09:32:05.919412042Z", + "last_worker_end": "2025-12-23T09:32:05.98705Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:05.919372641Z", + "start_time": "2025-12-23T09:32:05.919451044Z", + "end_time": "2025-12-23T09:32:05.919535047Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:05.919627Z", + "start_time": "2025-12-23T09:32:05.919777Z", + "end_time": "2025-12-23T09:32:05.98705Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:05.91935234Z", + "start_time": "2025-12-23T09:32:05.919412042Z", + "end_time": "2025-12-23T09:32:05.919559348Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:05.91935164Z", + "start_time": "2025-12-23T09:32:05.919434143Z", + "end_time": "2025-12-23T09:32:05.919487445Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3404, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/00042f853485865198728f8d1e1c61c2d5a248e1.json b/data/output/00042f853485865198728f8d1e1c61c2d5a248e1.json new file mode 100644 index 0000000..a6c1d58 --- /dev/null +++ b/data/output/00042f853485865198728f8d1e1c61c2d5a248e1.json @@ -0,0 +1,230 @@ +{ + "file_name": "00042f853485865198728f8d1e1c61c2d5a248e1.txt", + "total_words": 448, + "top_n_words": [ + { + "word": "s", + "count": 23 + }, + { + "word": "the", + "count": 17 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "from", + "count": 11 + }, + { + "word": "he", + "count": 11 + }, + { + "word": "mcgregor", + "count": 10 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "his", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "hull", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "'He's been inspirational since day one ...", + "length": 42 + }, + { + "text": "Allan really studies up on all that stuff and he's clearly doing it well, as the stats prove.", + "length": 93 + }, + { + "text": "David Meyler says his keeper has been an inspiration since his move last season from Besiktas .", + "length": 95 + }, + { + "text": "Hull goalkeeper Allan McGregor starred in their 0-0 draw with West Brom after saving a penalty .", + "length": 96 + }, + { + "text": "Eden Hazard is deadly from 12 yards, having scored 17 from 17 attempts in domestic competitions .", + "length": 97 + }, + { + "text": "'They all talk through penalties and go through the information about whatever side we're playing.", + "length": 98 + }, + { + "text": "Dorrans reacts after McGregor was up to the task to save a draw for Steve Bruce's 18th-placed side .", + "length": 100 + }, + { + "text": "Graham Dorrans couldn't beat McGregor, who has now saved all three Premier League penalties he's faced .", + "length": 104 + }, + { + "text": "'He's a fantastic keeper,' Meyler told The Sun's Graeme Bryce of McGregor, who joined Hull from Besiktas.", + "length": 105 + }, + { + "text": "Hazard has been flawless in leagues for Lille and Chelsea, but is yet to take a penalty against Hull's McGregor .", + "length": 113 + }, + { + "text": "He works really hard on penalties with our other goalies Steve Harper and Eldin Jakupovic, as well as Gary Walsh our goalkeeping coach.", + "length": 135 + }, + { + "text": "It was was not so long ago that McGregor was the subject of Republic of Ireland international Meyler's abuse from the Celtic stands while he was in goal for Rangers.", + "length": 165 + }, + { + "text": "There are few obvious similarities between superstar Chelsea midfielder Eden Hazard and Hull's Scottish keeper Allan McGregor but both share the honour of being penalty perfectionists.", + "length": 184 + }, + { + "text": "The former Rangers keeper has been impenetrable from 12 yards since his move to Hull, saving three penalties starting with Frank Lampard's effort in last season's opener against Chelsea.", + "length": 186 + }, + { + "text": "Since then he's saved a penalty from QPR's Charlie Austin and last week he was hailed Hull's hero as his stop from West Brom's Graham Dorrans helped Steve Bruce's side salvage a scoreless draw.", + "length": 193 + }, + { + "text": "While Hazard's record from the dot is impressive, the best in any of Europe's top five leagues, he's faltered in Europe and McGregor's team-mate David Meyler says McGregor could have his number.", + "length": 194 + }, + { + "text": "Meyler's no doubt happy to have the 32-year-old on his side after he famously saved Georgios Samaras's penalty late in the season in 2011 in an undoubted turning point in that year's Scottish title that went the way of Rangers.", + "length": 227 + }, + { + "text": "Hazard is understandably confident from the spot, having scored 17 goals from 17 attempts in domestic competition for Chelsea and his previous club Lille - but David Meyler says McGregor is the man to stop the Belgian should he get the chance on Saturday at Stamford Bridge.", + "length": 274 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.382800430059433 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:06.419735749Z", + "first_section_created": "2025-12-23T09:32:06.421786331Z", + "last_section_published": "2025-12-23T09:32:06.421917037Z", + "all_results_received": "2025-12-23T09:32:06.480685987Z", + "output_generated": "2025-12-23T09:32:06.480848893Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:06.421786331Z", + "publish_time": "2025-12-23T09:32:06.421917037Z", + "first_worker_start": "2025-12-23T09:32:06.422464359Z", + "last_worker_end": "2025-12-23T09:32:06.479785Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:06.422428457Z", + "start_time": "2025-12-23T09:32:06.42249756Z", + "end_time": "2025-12-23T09:32:06.422554262Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:06.422564Z", + "start_time": "2025-12-23T09:32:06.422753Z", + "end_time": "2025-12-23T09:32:06.479785Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:06.422404556Z", + "start_time": "2025-12-23T09:32:06.422477259Z", + "end_time": "2025-12-23T09:32:06.422621465Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:06.422411756Z", + "start_time": "2025-12-23T09:32:06.422464359Z", + "end_time": "2025-12-23T09:32:06.42250306Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2519, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/0004306354494f090ee2d7bc5ddbf80b63e80de6.json b/data/output/0004306354494f090ee2d7bc5ddbf80b63e80de6.json new file mode 100644 index 0000000..ae4262e --- /dev/null +++ b/data/output/0004306354494f090ee2d7bc5ddbf80b63e80de6.json @@ -0,0 +1,358 @@ +{ + "file_name": "0004306354494f090ee2d7bc5ddbf80b63e80de6.txt", + "total_words": 562, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "n", + "count": 11 + }, + { + "word": "said", + "count": 11 + }, + { + "word": "u", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "coast", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "official.", + "length": 9 + }, + { + "text": "Besides the U.", + "length": 14 + }, + { + "text": "Members of the U.", + "length": 17 + }, + { + "text": "We can't extrapolate.", + "length": 21 + }, + { + "text": "A spokeswoman for the U.", + "length": 24 + }, + { + "text": "The latter was sworn in on May 21.", + "length": 34 + }, + { + "text": "Clinton urges Ivory Coast dialogue .", + "length": 36 + }, + { + "text": "We just can't fingerpoint any group.", + "length": 36 + }, + { + "text": "So this is a very complex environment.", + "length": 38 + }, + { + "text": "Office for the Coordination of Humanitarian Affairs.", + "length": 52 + }, + { + "text": "peacekeepers and eight civilians dead, according to a U.", + "length": 56 + }, + { + "text": "\" The peacekeepers were on a reconnaissance patrol because U.", + "length": 61 + }, + { + "text": "CNN's Christabelle Fombu and Tom Watkins contributed to this report.", + "length": 68 + }, + { + "text": "\" He added that he understood other peacekeepers remained in danger.", + "length": 68 + }, + { + "text": "Another 35 families crossed the Ivory Coast's southwest border into U.", + "length": 70 + }, + { + "text": "They provide technical, logistical and security support to the government.", + "length": 74 + }, + { + "text": "\"This is an area where you have so many different types of armed people,\" she said.", + "length": 83 + }, + { + "text": "\"People have different aims and different reasons to carry arms and to perpetrate attack.", + "length": 89 + }, + { + "text": "peacekeepers, humanitarian groups reported eight civilians died in violence, said Dourlot.", + "length": 90 + }, + { + "text": "(CNN) -- Thousands on Saturday fled the area in southwestern Ivory Coast where attacks left seven U.", + "length": 100 + }, + { + "text": "Several hundred had arrived by midday Saturday in the town, which is on the edge of Tai National Park.", + "length": 102 + }, + { + "text": "humanitarian affairs office have deployed to Tai to coordinate relief efforts there with local authorities.", + "length": 107 + }, + { + "text": "\" Van den Wildenberg said it was not clear who was responsible for the attack, which occurred mid-afternoon.", + "length": 108 + }, + { + "text": "officials had heard rumors several days earlier of armed men in the area threatening to attack a village, she said.", + "length": 115 + }, + { + "text": "Operation in Cote d'Ivoire and Ivory Coast troops have increased their presence in the area, Dourlot said Saturday.", + "length": 115 + }, + { + "text": "mission in Ivory Coast said Friday's incident was the first attack on peacekeepers since they entered the country in 2004.", + "length": 122 + }, + { + "text": "Gbagbo is in custody at the Hague, accused of crimes against humanity during post-election violence that killed thousands.", + "length": 122 + }, + { + "text": "Humanitarian organizations reported Saturday they were expecting about 4,000 people in Tai, said Remi Dourlot, a spokesman for the U.", + "length": 133 + }, + { + "text": "refugee camps in Liberia, and humanitarian groups said hundreds of others had been pushed south by the violence, according to Dourlot.", + "length": 134 + }, + { + "text": "Secretary-General Ban Ki-moon on Friday called on the government of Ivory Coast \"to do its utmost to identify the perpetrators and hold them accountable.", + "length": 153 + }, + { + "text": "\"Even tonight, after the attack, more than 40 peacekeepers remain with the villagers in this remote region to protect them from this armed group,\" Ban said.", + "length": 156 + }, + { + "text": "One attack occurred late Thursday and into Friday near Para Village, not far from the west-central African nation's border with Liberia, according to the United Nations.", + "length": 169 + }, + { + "text": "The movement comes after blue-helmeted peacekeepers -- who were in the area because of threats against civilians -- came under attack, the United Nations said in a statement.", + "length": 174 + }, + { + "text": "According to the United Nations, its peacekeeping force in Ivory Coast as of April 30 included nearly 11,000 uniformed personnel, as well as several hundred international civilian personnel, local staff and volunteers.", + "length": 218 + }, + { + "text": "Sylvie van den Wildenberg, in a telephone interview from her office in Abidjan, said the remaining forces were continuing to protect area residents, \"who are living in a very difficult terrain -- their villages scattered.", + "length": 221 + }, + { + "text": "peacekeepers remained in Ivory Coast after the 2010 presidential election, when the country was thrown into crisis after incumbent President Laurent Gbagbo refused to acknowledge defeat to former Prime Minister Alassane Ouattara.", + "length": 229 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8183609247207642 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:06.922661961Z", + "first_section_created": "2025-12-23T09:32:06.923041776Z", + "last_section_published": "2025-12-23T09:32:06.923231184Z", + "all_results_received": "2025-12-23T09:32:06.987846768Z", + "output_generated": "2025-12-23T09:32:06.988005074Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:06.923041776Z", + "publish_time": "2025-12-23T09:32:06.923231184Z", + "first_worker_start": "2025-12-23T09:32:06.923846008Z", + "last_worker_end": "2025-12-23T09:32:06.98695Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:06.923783006Z", + "start_time": "2025-12-23T09:32:06.923858209Z", + "end_time": "2025-12-23T09:32:06.923957913Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:06.924032Z", + "start_time": "2025-12-23T09:32:06.924182Z", + "end_time": "2025-12-23T09:32:06.98695Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:06.923793806Z", + "start_time": "2025-12-23T09:32:06.923968913Z", + "end_time": "2025-12-23T09:32:06.924101218Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:06.923767105Z", + "start_time": "2025-12-23T09:32:06.923846008Z", + "end_time": "2025-12-23T09:32:06.923911611Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3481, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/000452ad783360e6b7c3cd993efa880328b98622.json b/data/output/000452ad783360e6b7c3cd993efa880328b98622.json new file mode 100644 index 0000000..5c620a0 --- /dev/null +++ b/data/output/000452ad783360e6b7c3cd993efa880328b98622.json @@ -0,0 +1,310 @@ +{ + "file_name": "000452ad783360e6b7c3cd993efa880328b98622.txt", + "total_words": 726, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "on", + "count": 17 + }, + { + "word": "spent", + "count": 15 + }, + { + "word": "000", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "council", + "count": 12 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "in", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "6billion lost to fraud, £1.", + "length": 28 + }, + { + "text": "'Last year we saved taxpayers £14.", + "length": 35 + }, + { + "text": "Matt Chorley, Mailonline Political Editor .", + "length": 43 + }, + { + "text": "4billion on ‘overgenerous’ annual leave.", + "length": 44 + }, + { + "text": "Over three years Cardiff Council spent £28,000 on biscuits.", + "length": 60 + }, + { + "text": "It's time for a war on waste right across the public sector.", + "length": 60 + }, + { + "text": "6billion in higher sickness rates in the public sector and £1.", + "length": 63 + }, + { + "text": "3billion compared to the final year of the last Labour government.", + "length": 66 + }, + { + "text": "Nottingham City Council spent £4,450 on an office Christmas tree .", + "length": 67 + }, + { + "text": "It includes £22billion on overpaying public sector pay and pensions, £20.", + "length": 75 + }, + { + "text": "3billion, compared to Labour's last year in office, but there's so much more to do.", + "length": 83 + }, + { + "text": "South Tyneside Council spent £214,000 trying to unmask a blogger called Mr Monkey.", + "length": 83 + }, + { + "text": "We need to strip out wasteful and unnecessary spending and start living within our means again.", + "length": 95 + }, + { + "text": "The government insists major progress has been made in reining in spending, including saving £14.", + "length": 99 + }, + { + "text": "'Under Labour departments paid different prices for common goods such as paper and printer cartridges.", + "length": 102 + }, + { + "text": "Medway Council spent £4,000 hiring actor Brian Blessed to record an audio guide to play on its buses.", + "length": 102 + }, + { + "text": "‘It would be nothing short of immoral to saddle the next generation with our trillion-pound debt mountain.", + "length": 108 + }, + { + "text": "‘For too long taxpayers' money has been spent with impunity, with little accountability and not enough transparency.", + "length": 118 + }, + { + "text": "The Department for International Development used £4million on a scheme to develop an Ethiopian version of the Spice Girls.", + "length": 124 + }, + { + "text": "The Public art gallery in West Bromwich closed last year after failing to attract enough visitors, despite costing some £72million.", + "length": 132 + }, + { + "text": "The Public art gallery in West Bromwich closed last year after failing to attract enough visitors, despite costing some £72million .", + "length": 133 + }, + { + "text": "We are making sure there's real central oversight over spending and have slashed what we pay on consultants, property and advertising.", + "length": 134 + }, + { + "text": "The War on Waste hopes to change that and remind those we trust with our money that we're watching how it is spent very carefully indeed.", + "length": 137 + }, + { + "text": "The group is launching a War on Waste roadshow touring the country to highlight the scale of the challenge of balancing the nation’s books.", + "length": 141 + }, + { + "text": "The Forestry Commission spent £70 buying a bunny costume, the NHS spent £1,000 on a fat suit and one hospital bought steel giraffes for £8,000.", + "length": 146 + }, + { + "text": "The TPA warns that even in an age of austerity, £120billion-a-year is being wasted – which could be used to fund a £4,500 tax cut for every family.", + "length": 151 + }, + { + "text": "’ The roadshow will tour the country over the next few days, including stops at the constituency offices of David Cameron, Ed Miliband and Nick Clegg.", + "length": 152 + }, + { + "text": "Britain’s debts are increasing by almost £4,000 every second, campaigners will warn today as the spotlight falls on a catalogue of public sector waste.", + "length": 154 + }, + { + "text": "Manchester City Council blew £900,000 on hi-tech Christmas lights while Angus Council splashed out £3,000 on transporting and setting up seven Christmas trees.", + "length": 161 + }, + { + "text": "Leicester City Council spent £6,000 hiring minor celebrities, including ex-BBC weatherman Des Coleman, X factor finalists MK1 (pictured) and Apprentice reject Melody Hossaini .", + "length": 177 + }, + { + "text": "But its Bumper Book of Waste also reveals details of the bizarre and extravagant spending of public bodies who often argue that cuts mean they are struggling to make ends meet, .", + "length": 178 + }, + { + "text": "TaxPayers' Alliance Chief Executive Jonathan Isaby will say: ‘Far too much taxpayers' money is wasted, keeping taxes high and taking precious resources away from essential services.", + "length": 183 + }, + { + "text": "The Taxpayers Alliance warns public money has been ‘spent with impunity’ by councils, hospitals and Whitehall departments splashing out on celebrities, Christmas trees and costumes.", + "length": 185 + }, + { + "text": "Last year alone it emerged Leicester City Council spent £6,000 hiring minor celebrities, including ex-BBC weatherman Des Coleman, X factor finalists MK1 and Apprentice reject Melody Hossaini.", + "length": 192 + }, + { + "text": "In previous years Nottingham City Council spent £4,450 on an office Christmas tree while Sandwell Council spent £4,000 hiring TV presenter Keith Chegwin for a St George’s Day event in 2009.", + "length": 193 + }, + { + "text": "A spokesman for Cabinet Office minister Francis Maude said: 'As part of our long-term plan to rebalance the economy we are reforming Whitehall, tackling waste and making things more business-like.", + "length": 196 + }, + { + "text": "Medway Council spent £4,000 hiring actor Brian Blessed to record an audio guide to play on its buses while Sandwell Council spent £4,000 hiring TV presenter Keith Chegwin for a St George’s Day event .", + "length": 204 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7570550441741943 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:07.424005609Z", + "first_section_created": "2025-12-23T09:32:07.424348823Z", + "last_section_published": "2025-12-23T09:32:07.424546931Z", + "all_results_received": "2025-12-23T09:32:07.487724857Z", + "output_generated": "2025-12-23T09:32:07.487919465Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:07.424348823Z", + "publish_time": "2025-12-23T09:32:07.424546931Z", + "first_worker_start": "2025-12-23T09:32:07.425248059Z", + "last_worker_end": "2025-12-23T09:32:07.486755Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:07.425227358Z", + "start_time": "2025-12-23T09:32:07.425313561Z", + "end_time": "2025-12-23T09:32:07.425398865Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:07.425383Z", + "start_time": "2025-12-23T09:32:07.425525Z", + "end_time": "2025-12-23T09:32:07.486755Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:07.425219958Z", + "start_time": "2025-12-23T09:32:07.425342062Z", + "end_time": "2025-12-23T09:32:07.425549471Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:07.425166155Z", + "start_time": "2025-12-23T09:32:07.425248059Z", + "end_time": "2025-12-23T09:32:07.425328362Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4440, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/00046ba5302d2eb9432fdda424305453b05a9932.json b/data/output/00046ba5302d2eb9432fdda424305453b05a9932.json new file mode 100644 index 0000000..79a2cb4 --- /dev/null +++ b/data/output/00046ba5302d2eb9432fdda424305453b05a9932.json @@ -0,0 +1,294 @@ +{ + "file_name": "00046ba5302d2eb9432fdda424305453b05a9932.txt", + "total_words": 680, + "top_n_words": [ + { + "word": "she", + "count": 22 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "the", + "count": 20 + }, + { + "word": "i", + "count": 19 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "lucy", + "count": 16 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "for", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Bianca London .", + "length": 15 + }, + { + "text": "I looked like Barbie.", + "length": 21 + }, + { + "text": "They just can't get enough!", + "length": 27 + }, + { + "text": "maybe one day I will look into that.", + "length": 36 + }, + { + "text": "'I love dresses that are figure-hugging.", + "length": 40 + }, + { + "text": "' Speaking about plans for the future, Lucy added: 'I .", + "length": 55 + }, + { + "text": "working behind-the-scenes producing and directing, which I love, so .", + "length": 69 + }, + { + "text": "'I also love presenting and would love to have my own show,' she said.", + "length": 70 + }, + { + "text": "'I'm so proud of my book and I'm pretty overwhelmed with how it's done.", + "length": 71 + }, + { + "text": "want to stay in the industry and being on MIC has taught me loads about .", + "length": 73 + }, + { + "text": "'I prefer to buy high street clothes because I usually only wear an item once,' she said.", + "length": 89 + }, + { + "text": "I'm brutally honest, as always, and I think that's why people like me; they appreciate it.", + "length": 90 + }, + { + "text": "New campaign: Lucy Watson has landed herself yet another campaign, this time for Lipsy VIP .", + "length": 92 + }, + { + "text": "I definitely eat what I want and I'm obsessed with my smoothie maker,' she said of her diet.", + "length": 92 + }, + { + "text": "'I really wanted to be in good shape for our trip but filming has been so hectic, it's really hard.", + "length": 99 + }, + { + "text": "In terms of her day-to-day style, Lucy says that she has no time to shop and buys everything online.", + "length": 100 + }, + { + "text": "Sadly, I went for a giant bright pink dress and I had really blonde hair, which was extremely girly.", + "length": 100 + }, + { + "text": "Classy: Lucy says the new range of occasion wear, which she is the new face of, is perfect for your school prom .", + "length": 113 + }, + { + "text": "The 23-year-old has just been unveiled as the new face of Lipsy's AW14 VIP collection, a capsule range of occasionwear.", + "length": 119 + }, + { + "text": "Monochrome madness: Lucy shows off her trim figure in the new campaign, which sees her posing in the British countryside .", + "length": 122 + }, + { + "text": "Before she flew out to New York, Lucy was working out with a trainer twice a week to get in shape for her stint in the Big Apple.", + "length": 129 + }, + { + "text": "' Horsing around: Lucy says she's honoured to be apart of the Lipsy team because she's been wearing their dresses since she was 16 .", + "length": 132 + }, + { + "text": "Success story: Since joining Made In Chelsea a few short years ago, Lucy has been snapped up by countless fashion and beauty brands .", + "length": 133 + }, + { + "text": "'I was so happy to do this and the dresses are feminine, perfect for different occasions and perfectly reflect my style,' she told us.", + "length": 134 + }, + { + "text": "Black out: Lucy looks trim in a black jumpsuit as she reveals she's been working out ahead of filming in New York with Made In Chelsea .", + "length": 136 + }, + { + "text": "Working out: Lucy has been working out with a personal trainer a couple of times a week to keep her body in shape for the MIC spin-off show .", + "length": 141 + }, + { + "text": "Stateside: The Cast of Made in Chelsea were spotted filming today in downtown NYC (L-R) Spencer Matthews, Jamie Laing, Lucy Watson and Riley .", + "length": 142 + }, + { + "text": "She may currently be filming for a Made In Chelsea spin-off over in New York but Lucy Watson's latest fashion shoot is quintessentially British.", + "length": 144 + }, + { + "text": "Lipsy is also introducing a refreshed collection of its eyewear range, modelled by glamorous Made in Chelsea star and face of the new Lipsy VIP range, Lucy .", + "length": 157 + }, + { + "text": "She has a stylist who 'buys her stuff that she likes' but she dresses herself and aspires to look like 'cool' Kylie Jenner or Miranda Kerr, who she thinks is 'sophisticated with a feminine edge to her'.", + "length": 202 + }, + { + "text": "Sharing her top tips for prom dressing, which she says the range is perfect for, Lucy said: 'It's so important to go for a colour that suits you because it's a moment you'll remember for the rest of your life.", + "length": 209 + }, + { + "text": "Lucy, who can be seen posing in an array of glamorous gowns in the English countryside, told MailOnline that she was honoured to be asked to work with Lipsy because she's been wearing their designs since she was 16.", + "length": 215 + }, + { + "text": "In between filming and unveiling her new jewellery range, Lucy has penned her debut tome, The Dating Game, which she describes as the modern girl’s no-holds-barred guide to 21st century dating, relationships and break-ups.", + "length": 224 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.37765583395957947 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:07.924732132Z", + "first_section_created": "2025-12-23T09:32:07.925095247Z", + "last_section_published": "2025-12-23T09:32:07.925315456Z", + "all_results_received": "2025-12-23T09:32:07.990336856Z", + "output_generated": "2025-12-23T09:32:07.990514263Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:07.925095247Z", + "publish_time": "2025-12-23T09:32:07.925315456Z", + "first_worker_start": "2025-12-23T09:32:07.925844777Z", + "last_worker_end": "2025-12-23T09:32:07.989395Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:07.925803075Z", + "start_time": "2025-12-23T09:32:07.925863878Z", + "end_time": "2025-12-23T09:32:07.925942981Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:07.926015Z", + "start_time": "2025-12-23T09:32:07.926157Z", + "end_time": "2025-12-23T09:32:07.989395Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:07.925854077Z", + "start_time": "2025-12-23T09:32:07.925907579Z", + "end_time": "2025-12-23T09:32:07.926067286Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:07.925774574Z", + "start_time": "2025-12-23T09:32:07.925844777Z", + "end_time": "2025-12-23T09:32:07.925934881Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3626, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/00047931b6dc1201d48dc8568d053e74c67f0d3c.json b/data/output/00047931b6dc1201d48dc8568d053e74c67f0d3c.json new file mode 100644 index 0000000..c34d2b2 --- /dev/null +++ b/data/output/00047931b6dc1201d48dc8568d053e74c67f0d3c.json @@ -0,0 +1,290 @@ +{ + "file_name": "00047931b6dc1201d48dc8568d053e74c67f0d3c.txt", + "total_words": 521, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "her", + "count": 16 + }, + { + "word": "harris", + "count": 13 + }, + { + "word": "was", + "count": 12 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "she", + "count": 10 + }, + { + "word": "i", + "count": 9 + }, + { + "word": "tickets", + "count": 9 + }, + { + "word": "vicki", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "com.", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "'It blew me away.", + "length": 17 + }, + { + "text": "'It blew me away.", + "length": 17 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "' Scroll down for video .", + "length": 25 + }, + { + "text": "19:59 EST, 31 January 2014 .", + "length": 28 + }, + { + "text": "01:10 EST, 1 February 2014 .", + "length": 28 + }, + { + "text": "' 'What are you talking about?", + "length": 30 + }, + { + "text": "I had tears running down my face.", + "length": 33 + }, + { + "text": "But her generous son this week found the best way to make it up to her.", + "length": 71 + }, + { + "text": "In the heartwarming video posted to YouTube, which has received more than 1.", + "length": 76 + }, + { + "text": "While she was desperate to attend the game, Vicki Harris couldn't afford tickets.", + "length": 81 + }, + { + "text": "Vicki Harris has been a Seahawks fan since 1976 and has taken her son to many games.", + "length": 84 + }, + { + "text": "He snapped up three tickets for himself, his mom and a childhood friend, flights and hotel rooms.", + "length": 97 + }, + { + "text": "The loyal son said he was blown away by his mother's emotional reaction when he handed her the package.", + "length": 103 + }, + { + "text": "' Vicki Harris was in shock when she found Super Bowl tickets inside a FedEx box given to her by her son .", + "length": 106 + }, + { + "text": "3 million hits, Vicki Harris initially couldn't believe her luck before she began sobbing, holding her head in her hands.", + "length": 121 + }, + { + "text": "'I'm excited to go see the game, but there was nothing more meaningful than to see my mom's reaction that I've shared in the video.", + "length": 131 + }, + { + "text": "'I knew she was going to cry': Mike Harris cried as he watched his mother Vicki's emotional reaction to receiving Super Bowl tickets .", + "length": 134 + }, + { + "text": "Overcome: Longtime Seahawks fan Vicki Harris was overwhelmed with emotion when her son, Mike Harris, surprised her with Super Bowl tickets .", + "length": 140 + }, + { + "text": "I had tears running down my face': When Mike Harris decided to film his mother Vicki's reaction, he didn't realize how emotional she would be .", + "length": 143 + }, + { + "text": "Mike Harris bought his mom Super Bowl tickets and when they arrived at his friend's house in a FedEx box, he filmed his mother's emotional reaction.", + "length": 148 + }, + { + "text": "But early this month, Mike Harris noticed the price of tickets and accommodation had plunged due to 'cold weather and cross-country travel difficulties'.", + "length": 153 + }, + { + "text": "Emotional: Vicki Harris stared at the Super Bowl tickets in disbelief as her son Mike told her how much the tickets cost and where they would be staying .", + "length": 154 + }, + { + "text": "Thirty years ago, Vicki Harris was forced to miss the championship celebration for her beloved Seattle Seahawks because she was on the brink of giving birth.", + "length": 157 + }, + { + "text": "On Sunday, she lined up with thousands of other fans to give the Seahawks a proper sendoff before they play the Denver Broncos in Super Bowl XLVIII this weekend.", + "length": 161 + }, + { + "text": "'I knew she was going to cry but I didn’t know that there would be the total disbelief that it was happening, just because we talked about it and we hoped to do it,' he said.", + "length": 181 + }, + { + "text": "In 1983, her husband wouldn't let her go to Boeing Field to welcome the Hawks home when the team knocked off the Miami Dolphins in the playoffs because she was pregnant, MyNorthwest.", + "length": 182 + }, + { + "text": "'Thirty years after she skipped their welcome home for my sake, I was able to return the favor in the biggest way I could ever think of,' Mike Harris said, according to Parent Herald.", + "length": 183 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.44043052196502686 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:08.426128883Z", + "first_section_created": "2025-12-23T09:32:08.426498998Z", + "last_section_published": "2025-12-23T09:32:08.426683405Z", + "all_results_received": "2025-12-23T09:32:08.501416493Z", + "output_generated": "2025-12-23T09:32:08.5015771Z", + "total_processing_time_ms": 75, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 74, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:08.426498998Z", + "publish_time": "2025-12-23T09:32:08.426683405Z", + "first_worker_start": "2025-12-23T09:32:08.42730913Z", + "last_worker_end": "2025-12-23T09:32:08.500383Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:08.427268528Z", + "start_time": "2025-12-23T09:32:08.427328631Z", + "end_time": "2025-12-23T09:32:08.427400434Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:08.427414Z", + "start_time": "2025-12-23T09:32:08.427585Z", + "end_time": "2025-12-23T09:32:08.500383Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:08.427235727Z", + "start_time": "2025-12-23T09:32:08.42731753Z", + "end_time": "2025-12-23T09:32:08.429128203Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 1 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:08.427241927Z", + "start_time": "2025-12-23T09:32:08.42730913Z", + "end_time": "2025-12-23T09:32:08.427353932Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 72, + "min_processing_ms": 72, + "max_processing_ms": 72, + "avg_processing_ms": 72, + "median_processing_ms": 72, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 1, + "min_processing_ms": 1, + "max_processing_ms": 1, + "avg_processing_ms": 1, + "median_processing_ms": 1, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2865, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/0004a01b031ea6b5f60e97a623a45ae151220c11.json b/data/output/0004a01b031ea6b5f60e97a623a45ae151220c11.json new file mode 100644 index 0000000..333bb18 --- /dev/null +++ b/data/output/0004a01b031ea6b5f60e97a623a45ae151220c11.json @@ -0,0 +1,274 @@ +{ + "file_name": "0004a01b031ea6b5f60e97a623a45ae151220c11.txt", + "total_words": 458, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "giraffe", + "count": 10 + }, + { + "word": "at", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "with", + "count": 7 + }, + { + "word": "calf", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Alex Gore .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "What's in a name?", + "length": 17 + }, + { + "text": "11:27 EST, 26 March 2013 .", + "length": 26 + }, + { + "text": "11:33 EST, 26 March 2013 .", + "length": 26 + }, + { + "text": "It is one of the most endangered giraffe subspecies, with only a few hundred left in the wild.", + "length": 94 + }, + { + "text": "The calf is still to be named and keepers are searching for a lucky visitor who will pick it .", + "length": 94 + }, + { + "text": "It follows the birth of a rare giraffe listed as an endangered subspecies in the United States.", + "length": 95 + }, + { + "text": "Staff had double cause for celebration because the calf is the first born at the off-exhibit centre.", + "length": 100 + }, + { + "text": "A proud giraffe has been standing tall following the arrival of her newborn calf at a zoo in Germany.", + "length": 101 + }, + { + "text": "Shortlist: Zoo keepers want the newborn's name to start with a 'Z', such as Zuli, Zikomo or Zebenjo .", + "length": 101 + }, + { + "text": "Having a giraffe: The offspring measures around six feet tall and was born at Dortmund Zoo this week .", + "length": 102 + }, + { + "text": "Growth spurt: Giraffe calves weigh around 15 stone and nearly double in height in their first year alone .", + "length": 106 + }, + { + "text": "Its mother Gambela has been lavishing the newborn with plenty of attention inside their enclosure ever since.", + "length": 109 + }, + { + "text": "Newborn: A proud giraffe has been standing tall following the arrival of her newborn calf at a zoo in Germany .", + "length": 111 + }, + { + "text": "Rare: A giraffe listed as an endangered subspecies has also been born at a conservation centre in Connecticut .", + "length": 111 + }, + { + "text": "The subspecies is also known as the Baringo giraffe, after the Lake Baringo area of Kenya, or the Ugandan giraffe.", + "length": 114 + }, + { + "text": "Petal, a six-year-old Rothschild giraffe, gave birth to a female calf on Friday at a conservation centre in Connecticut.", + "length": 120 + }, + { + "text": "The gestation period for giraffes lasts around 15 months and calves spend up to 12 weeks reliant on their mother's milk.", + "length": 120 + }, + { + "text": "They weigh around 15 stone at birth and grow very quickly, with most calves nearly doubling in height in the first year alone.", + "length": 126 + }, + { + "text": "Zoo staff are insisting, however, that its name will start with a 'Z' and have placed Zuli, Zikomo and Zebenjo on the shortlist.", + "length": 128 + }, + { + "text": "Founder and director of the LEO Zoological Conservation Center, Marcella Leone, watched the birth alongside staff and a group of other giraffes.", + "length": 144 + }, + { + "text": "When fully grown, the newborn, who will mingle with a group of five giraffes, which includes two pregnant giraffes, could reach 18 feet in height.", + "length": 146 + }, + { + "text": "Rothschild giraffes were named and described by Lord Walter Rothschild, a British zoologist, after an expedition to East Africa in the early 1900s.", + "length": 147 + }, + { + "text": "But the calf is still to be named, with keepers searching for a lucky visitor to become its godfather or godmother and have the honour of picking it.", + "length": 149 + }, + { + "text": "The offspring measures around six feet tall and was born at Dortmund Zoo this week to the delight of its mother, her keepers and members of the public.", + "length": 151 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4375886619091034 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:08.927518433Z", + "first_section_created": "2025-12-23T09:32:08.927922649Z", + "last_section_published": "2025-12-23T09:32:08.928119457Z", + "all_results_received": "2025-12-23T09:32:08.995772662Z", + "output_generated": "2025-12-23T09:32:08.995917968Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:08.927922649Z", + "publish_time": "2025-12-23T09:32:08.928119457Z", + "first_worker_start": "2025-12-23T09:32:08.928790184Z", + "last_worker_end": "2025-12-23T09:32:08.994637Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:08.928791484Z", + "start_time": "2025-12-23T09:32:08.928857986Z", + "end_time": "2025-12-23T09:32:08.928913988Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:08.928969Z", + "start_time": "2025-12-23T09:32:08.929106Z", + "end_time": "2025-12-23T09:32:08.994637Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:08.928751982Z", + "start_time": "2025-12-23T09:32:08.928807384Z", + "end_time": "2025-12-23T09:32:08.928913588Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:08.928722181Z", + "start_time": "2025-12-23T09:32:08.928790184Z", + "end_time": "2025-12-23T09:32:08.928827585Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2606, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/00050e5dbf398963ad40cb6ff81c918c932cc809.json b/data/output/00050e5dbf398963ad40cb6ff81c918c932cc809.json new file mode 100644 index 0000000..902b5c2 --- /dev/null +++ b/data/output/00050e5dbf398963ad40cb6ff81c918c932cc809.json @@ -0,0 +1,444 @@ +{ + "file_name": "00050e5dbf398963ad40cb6ff81c918c932cc809.txt", + "total_words": 936, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "he", + "count": 24 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "his", + "count": 22 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "s", + "count": 15 + }, + { + "word": "i", + "count": 14 + }, + { + "word": "of", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "' Edd wrote.", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Mike Larkin .", + "length": 13 + }, + { + "text": "'I made a mistake.", + "length": 18 + }, + { + "text": "18:03 EST, 16 July 2012 .", + "length": 25 + }, + { + "text": "02:40 EST, 17 July 2012 .", + "length": 25 + }, + { + "text": "I want to recant my words.", + "length": 26 + }, + { + "text": "Her children are her life.", + "length": 26 + }, + { + "text": "She’s totally devastated.", + "length": 27 + }, + { + "text": "You people make me f****** sick.", + "length": 32 + }, + { + "text": "I never meant to insult my uncle.", + "length": 33 + }, + { + "text": "'I took the post down minutes later.", + "length": 36 + }, + { + "text": "Edd said: 'Jennifer Flavin, you won.", + "length": 36 + }, + { + "text": "'What did he do wrong, say happy birthday?", + "length": 42 + }, + { + "text": "overdose, or that his body just packed up.", + "length": 42 + }, + { + "text": "Lets see how much he needs you after this.", + "length": 42 + }, + { + "text": "A source said: 'Sasha has been inconsolable.", + "length": 44 + }, + { + "text": "It follows reports that Los Angeles Police .", + "length": 44 + }, + { + "text": "You tore this family apart and got your trophy, Congrats!", + "length": 57 + }, + { + "text": "Edd Filiti blasted the Rambo star via his Facebook page .", + "length": 57 + }, + { + "text": "This agonizing loss will be felt for the rest of our lives.", + "length": 59 + }, + { + "text": "He added: 'When a parent loses a child there is no greater pain.", + "length": 64 + }, + { + "text": "Department Robbery and Homicide division has taken over the case.", + "length": 65 + }, + { + "text": "It is a very emotional time and I vented my feelings,' he explained.", + "length": 68 + }, + { + "text": "Blamed Sylvester's wife Jennifer Flavin for tearing the family apart .", + "length": 70 + }, + { + "text": "I have my own issues with him, but that should never be said on Facebook.", + "length": 73 + }, + { + "text": "Gone: The offending messages have since been removed from his Facebook page .", + "length": 77 + }, + { + "text": "'Like you told my family, \"I'm Sly's family now, he doesn't [need] you anymore.", + "length": 79 + }, + { + "text": "Sage's mother Sasha Czack,split with Stallone in 1985 after 11 years of marriage.", + "length": 81 + }, + { + "text": "According to reports she got a $12 million divorce settlement from the filmmaker.", + "length": 81 + }, + { + "text": "'I was feeling raw and emotional and I never meant for my feeling to become public.", + "length": 83 + }, + { + "text": "Claimed that Sage called the actor on his birthday on July 6 but he never picked up .", + "length": 85 + }, + { + "text": "' Big break: Sylvester helped his son to stardom when he cast him in Rocky V in 1990 .", + "length": 86 + }, + { + "text": "'There was more than one doctor that Sage was seeing in the days leading up to his death.", + "length": 89 + }, + { + "text": "' Tragic: It is believed the aspiring young filmmaker died of an accidental drugs overdose .", + "length": 92 + }, + { + "text": "At work: The devastated actor was promoting The Expendables 2 at Comic Con on Friday night .", + "length": 92 + }, + { + "text": "Edd Filiti made the claims on his Facebook page, though the messages have since been removed.", + "length": 93 + }, + { + "text": "' Tragic: Police removing Sage Stallone's body from his Hollywood Hills home on Friday night .", + "length": 94 + }, + { + "text": "He released a heartrending statement in which he spoke of his pain over his son Sage's sudden death.", + "length": 100 + }, + { + "text": "He was telling me about how he wanted to shoot a film, he seemed hopeful for the future and making work plans.", + "length": 110 + }, + { + "text": "Girls and Corpses snapper Mark Berry told RadarOnline: 'I saw nothing to suggest that he was in a suicidal mood.", + "length": 112 + }, + { + "text": "'Neither you [Stallone's wife Jennifer Flavin] or he could return a phone call, which is all he wanted, his father.", + "length": 115 + }, + { + "text": "' An autopsy and a toxicology test were completed Sunday, but the results of the latter will not be released for weeks.", + "length": 119 + }, + { + "text": "'Cops want to establish a timeline of events leading up to Sage's death and they want to know what his medical history was.", + "length": 123 + }, + { + "text": "' While prescription bottles were found at the scene, no suicide not was found and it is believed his death was accidental.", + "length": 123 + }, + { + "text": "' Meanwhile it has emerged officers investigating the death want to talk to several doctors that prescribed him medications.", + "length": 124 + }, + { + "text": "'I know you don't care what happened, but you'll care when you’re squirming under the burning magnifying glass of public opinion.", + "length": 131 + }, + { + "text": "' Edd later decided to remove the comments from his Facebook page, telling the New York Post that he regretted sharing his feelings.", + "length": 132 + }, + { + "text": "When Sage called his father on July 6 to wish him happy birthday, the Hollywood star apparently didn't pick up and didn't call him back.", + "length": 136 + }, + { + "text": "'Sage was our first child and the center of our universe and I am humbly begging for all to have my son's memory and soul left in peace.", + "length": 136 + }, + { + "text": "Meanwhile a photographer who did a feature on Sage in May said he was not surprised about the death, though he said the young filmmaker was far from suicidal.", + "length": 158 + }, + { + "text": "' Edd, who is the son of Stallone's half-sister Toni Ann, also went on to make a withering attack to 43-year-old former model, who married the Rocky favourite in 1997.", + "length": 167 + }, + { + "text": "'My impression was that he was a very talented character who seemed under the influence of a pharmaceutical medication, he wasn’t a wreck but he was just a little spaced out.", + "length": 176 + }, + { + "text": "Accused: Sylvester Stallone's nephew has slammed the actor and his wife Jennifer Flavin for neglecting his son Sage, seen here attending the Daylight premiere together in 1996 .", + "length": 177 + }, + { + "text": "In a posting where the 18-year-old slammed the 66-year-old Rambo star's current wife Jennifer Flavin, he also said the action man had cut Sage out of his life, devastating the aspiring filmmaker.", + "length": 195 + }, + { + "text": "A source told RadarOnline: 'Detectives from the LAPD are in the process of going through evidence recovered from Sage's house and will be contacting the doctors that prescribed the medications found there.", + "length": 205 + }, + { + "text": "But Sylvester Stallone's nephew has blasted the actor, accusing him of being to blame as he continually neglected the 36-year-old who was found dead at his Hollywood Hills home on Friday from an apparent overdose.", + "length": 213 + }, + { + "text": "On Sunday night, Sylvester issued a statement asking 'people to respect my talented son's memory and feel compassion for his loving mother Sasha,' and for the media to stop 'the speculation and questionable reporting.", + "length": 217 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7877525985240936 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:09.428886482Z", + "first_section_created": "2025-12-23T09:32:09.430680454Z", + "last_section_published": "2025-12-23T09:32:09.430997166Z", + "all_results_received": "2025-12-23T09:32:09.518280557Z", + "output_generated": "2025-12-23T09:32:09.518505066Z", + "total_processing_time_ms": 89, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 87, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:09.430680454Z", + "publish_time": "2025-12-23T09:32:09.430901362Z", + "first_worker_start": "2025-12-23T09:32:09.431447184Z", + "last_worker_end": "2025-12-23T09:32:09.517353Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:09.431499186Z", + "start_time": "2025-12-23T09:32:09.431579689Z", + "end_time": "2025-12-23T09:32:09.431673393Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:09.431912Z", + "start_time": "2025-12-23T09:32:09.432109Z", + "end_time": "2025-12-23T09:32:09.517353Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 85 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:09.431542488Z", + "start_time": "2025-12-23T09:32:09.431644792Z", + "end_time": "2025-12-23T09:32:09.4318466Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:09.43134928Z", + "start_time": "2025-12-23T09:32:09.431447184Z", + "end_time": "2025-12-23T09:32:09.431556589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:09.430952964Z", + "publish_time": "2025-12-23T09:32:09.430997166Z", + "first_worker_start": "2025-12-23T09:32:09.431650192Z", + "last_worker_end": "2025-12-23T09:32:09.486413Z", + "total_journey_time_ms": 55, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:09.431741296Z", + "start_time": "2025-12-23T09:32:09.431790498Z", + "end_time": "2025-12-23T09:32:09.431799398Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:09.431961Z", + "start_time": "2025-12-23T09:32:09.432124Z", + "end_time": "2025-12-23T09:32:09.486413Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:09.431671193Z", + "start_time": "2025-12-23T09:32:09.431701894Z", + "end_time": "2025-12-23T09:32:09.431719195Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:09.431608091Z", + "start_time": "2025-12-23T09:32:09.431650192Z", + "end_time": "2025-12-23T09:32:09.431657093Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 139, + "min_processing_ms": 54, + "max_processing_ms": 85, + "avg_processing_ms": 69, + "median_processing_ms": 85, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2612, + "slowest_section_id": 0, + "slowest_section_time_ms": 86 + } +} diff --git a/data/output/000513feba4745611532547b50df128c5ea564e4.json b/data/output/000513feba4745611532547b50df128c5ea564e4.json new file mode 100644 index 0000000..37bb84b --- /dev/null +++ b/data/output/000513feba4745611532547b50df128c5ea564e4.json @@ -0,0 +1,254 @@ +{ + "file_name": "000513feba4745611532547b50df128c5ea564e4.txt", + "total_words": 544, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "french", + "count": 9 + }, + { + "word": "he", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "' 'This is not a war.", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "There is no military solution.", + "length": 30 + }, + { + "text": "'It really hurts our hearts,' he added.", + "length": 39 + }, + { + "text": "So we should have been prepared for this.", + "length": 41 + }, + { + "text": "'The National Guard is called in when policing has failed, he wrote.", + "length": 68 + }, + { + "text": "'Military presence in my city will mark a historic failure on the part of (government).", + "length": 87 + }, + { + "text": "'Frankly, the thing we haven't seen is a lot of government accountability,' French added.", + "length": 89 + }, + { + "text": "'It's impossible for this community to move forward with him still in that role,' he said.", + "length": 90 + }, + { + "text": "At the time, French took to Twitter to express his frustration over Nixon's state of emergency declaration.", + "length": 107 + }, + { + "text": "'Not many people have taken responsibility for what's happened, and people are still waiting for answers and change.", + "length": 116 + }, + { + "text": "By Tuesday, Missouri Governor Jay Nixon had sent more than 2,200 National Guard members to the Ferguson area to support local law enforcement.", + "length": 142 + }, + { + "text": "'We've gone through a tough week here, but we knew that a lot of people were very angry and we knew a lot of people were coming from other places.", + "length": 146 + }, + { + "text": "Alderman Antonio French said he does not believe Wilson's story of what happened in Ferguson that night in August when Michael Brown was shot dead .", + "length": 148 + }, + { + "text": "' French had his office torched during the first night of unrest sparked by the grand jury decision not to indict Wilson for the killing of Michael Brown.", + "length": 154 + }, + { + "text": "' Antonio French's office was among the buildings torched in a night of violence in Ferguson following the grand jury decision not to indict Darren Wilson .", + "length": 156 + }, + { + "text": "But French believes that many more resignations are needed in order for the community to begin healing, and has called for police chief Thomas Jackson to stand down.", + "length": 165 + }, + { + "text": "He went on to say that the community has not been given the closure or resolve it needs to move forward, and instead the grand jury process has just created more anger.", + "length": 168 + }, + { + "text": "French said former Ferguson police officer Darren Wilson (left) was 'remorseless, cold' in his account of how he shot and killed unarmed 18-year-old Michael Brown (right) in August .", + "length": 182 + }, + { + "text": "Former Ferguson police officer Darren Wilson was 'remorseless, cold,' in his account of how he shot and killed unarmed 18-year-old Michael Brown in August, a St Louis politician has claimed.", + "length": 190 + }, + { + "text": "Alderman Antonio French said he does not believe Wilson's story of what happened in Ferguson that night, telling ABC News that 'frankly a lot of his answers sounded like they were prepared by a lawyer'.", + "length": 203 + }, + { + "text": "On Sunday, Darren Wilson's lawyer revealed that the police officer had chosen to resign without severance pay after being made aware of threats that his fellow officers would be harmed if he stayed in the force.", + "length": 211 + }, + { + "text": "Within 30 minutes of the decision being announced that Monday night, Ferguson had erupted in an orgy of violence, arson, random gunfire and pitched battles, with heavily armed riot police unable to cope with the scale of the mayhem.", + "length": 232 + }, + { + "text": "' French told ABC that for the county prosecutor to release the grand jury decision at night and for the governor to call in the National Guard but not deploy them to West Florissant, 'it really showed a failure to grasp the situation and to handle it on the part of government'.", + "length": 279 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7839189767837524 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:09.931767691Z", + "first_section_created": "2025-12-23T09:32:09.932152407Z", + "last_section_published": "2025-12-23T09:32:09.932359715Z", + "all_results_received": "2025-12-23T09:32:09.993439858Z", + "output_generated": "2025-12-23T09:32:09.993584163Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:09.932152407Z", + "publish_time": "2025-12-23T09:32:09.932359715Z", + "first_worker_start": "2025-12-23T09:32:09.93299424Z", + "last_worker_end": "2025-12-23T09:32:09.99247Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:09.933059543Z", + "start_time": "2025-12-23T09:32:09.933123946Z", + "end_time": "2025-12-23T09:32:09.933190548Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:09.933201Z", + "start_time": "2025-12-23T09:32:09.933344Z", + "end_time": "2025-12-23T09:32:09.99247Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:09.933067343Z", + "start_time": "2025-12-23T09:32:09.933146847Z", + "end_time": "2025-12-23T09:32:09.933313153Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:09.932928938Z", + "start_time": "2025-12-23T09:32:09.93299424Z", + "end_time": "2025-12-23T09:32:09.933046343Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3106, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0005aa80b29d3e89e1f77bb781af9d195374e071.json b/data/output/0005aa80b29d3e89e1f77bb781af9d195374e071.json new file mode 100644 index 0000000..a3a4ef4 --- /dev/null +++ b/data/output/0005aa80b29d3e89e1f77bb781af9d195374e071.json @@ -0,0 +1,234 @@ +{ + "file_name": "0005aa80b29d3e89e1f77bb781af9d195374e071.txt", + "total_words": 462, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "her", + "count": 12 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "with", + "count": 10 + }, + { + "word": "at", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "is", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "The couple are expecting a baby .", + "length": 33 + }, + { + "text": "It will be the first of many roles for her.", + "length": 43 + }, + { + "text": "Her first film, Tulip Fever, is due out later this year.", + "length": 56 + }, + { + "text": "Miss Bonas plays Mrs Steen, the young wife of a merchant.", + "length": 57 + }, + { + "text": "He said: ‘Cressida is great and people will see what a fantastic actress she is.", + "length": 82 + }, + { + "text": "Cressida carried a small Mulberry clutch bag with her white dress as she posed on the red carpet .", + "length": 98 + }, + { + "text": "Cressida Bonas dazzled in a white Mulberry dress at the BAFTA tea party in Los Angeles last night .", + "length": 99 + }, + { + "text": "It is being produced by influential movie mogul Harvey Weinstein, who has been singing the aspiring actress’s praises.", + "length": 120 + }, + { + "text": "’ He praised her for not trying to use her fame to secure roles, saying: ‘She is not taking advantage of any media profile.", + "length": 127 + }, + { + "text": "Miss Bonas was first spotted with Fox, star of recent film Riot Club, at a West End screening of Tim Burton’s movie Big Eyes.", + "length": 127 + }, + { + "text": "She is yet to make her screen debut, but Cressida Bonas already appears to be completely at home at the glitzy Hollywood parties of the awards season.", + "length": 150 + }, + { + "text": "Earlier she dazzled in white at a BAFTA Tea Party, joining Cara Delevingne, Keira Knightley and the ladies of hit TV show Downton Abbey as they partied in the sunshine town.", + "length": 173 + }, + { + "text": "Prince Harry’s former girlfriend, 25, was seen mixing with the movie world’s A-list as part of the British contingent in Los Angeles ahead of the Golden Globes last night.", + "length": 175 + }, + { + "text": "Cressida chatted to director Michael Howells (left) and Downton star Joanne Froggatt (right) Gillian Anderson and Rosamund Pike (left) and Keira Knightley with her musician husband James Righton.", + "length": 196 + }, + { + "text": "’ Tulip Fever, adapted by Sir Tom Stoppard from the novel by Deborah Moggach, is set in the 17th century and sees an artist fall in love with a young woman after her husband commissions him to paint her portrait.", + "length": 214 + }, + { + "text": "Miss Bonas, who dated Prince Harry for two years until last spring, stars alongside model Cara Delevingne as well as Oscar-winners Dame Judi Dench and Christoph Waltz in Tulip Fever, but Weinstein has singled her out as one to watch.", + "length": 233 + }, + { + "text": "And they were together in Los Angeles at celebrity hot-spot Chateau Marmont on Saturday, posing arm in arm at a pre-Golden Globes dinner, where Miss Bonas was also pictured with Downton Abbey stars Joanne Froggatt and Laura Carmichael.", + "length": 235 + }, + { + "text": "Wowing LA: Cressida with Downton stars Joanne Froggatt and Laura Carmichael at a pre-Golden Globes party (left) and with rumoured new flame Freddie Fox (right) But she seems to have more than her career on her mind, with reports that she has begun a relationship with actor Freddie Fox, 26, son of Day Of The Jackal star Edward Fox and actress Joanna David.", + "length": 358 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4703695476055145 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:10.433135141Z", + "first_section_created": "2025-12-23T09:32:10.434781806Z", + "last_section_published": "2025-12-23T09:32:10.434915112Z", + "all_results_received": "2025-12-23T09:32:10.499369189Z", + "output_generated": "2025-12-23T09:32:10.499558997Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:10.434781806Z", + "publish_time": "2025-12-23T09:32:10.434915112Z", + "first_worker_start": "2025-12-23T09:32:10.43561274Z", + "last_worker_end": "2025-12-23T09:32:10.498358Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:10.43562884Z", + "start_time": "2025-12-23T09:32:10.435680242Z", + "end_time": "2025-12-23T09:32:10.435725444Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:10.435914Z", + "start_time": "2025-12-23T09:32:10.436042Z", + "end_time": "2025-12-23T09:32:10.498358Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:10.43561244Z", + "start_time": "2025-12-23T09:32:10.435663142Z", + "end_time": "2025-12-23T09:32:10.435792247Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:10.435544137Z", + "start_time": "2025-12-23T09:32:10.43561274Z", + "end_time": "2025-12-23T09:32:10.435672642Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2615, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0005c9808410ba585973fdf2eaa0d1bb9070546e.json b/data/output/0005c9808410ba585973fdf2eaa0d1bb9070546e.json new file mode 100644 index 0000000..c12e265 --- /dev/null +++ b/data/output/0005c9808410ba585973fdf2eaa0d1bb9070546e.json @@ -0,0 +1,262 @@ +{ + "file_name": "0005c9808410ba585973fdf2eaa0d1bb9070546e.txt", + "total_words": 482, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "bird", + "count": 13 + }, + { + "word": "on", + "count": 13 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "it", + "count": 9 + }, + { + "word": "police", + "count": 8 + }, + { + "word": "with", + "count": 8 + }, + { + "word": "club", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'I definitely didn't do it on the car.", + "length": 38 + }, + { + "text": "I didn't intend to disrespect anybody'.", + "length": 39 + }, + { + "text": "Greg Bird in action for the Gold Coast.", + "length": 39 + }, + { + "text": "Bird is a star player with the Gold Coast Titans.", + "length": 49 + }, + { + "text": "Greg Bird's wife Beccy Ruchow pictured before the Dally M Awards .", + "length": 66 + }, + { + "text": "His public statement followed a private meeting with club officials.", + "length": 68 + }, + { + "text": "'This is very embarrassing from the club's perspective, it's unacceptable.", + "length": 74 + }, + { + "text": "He has apologised for his actions saying 'I know it was a stupid thing to do ..", + "length": 79 + }, + { + "text": "The Titans have taken a statement from Bird and the club has been talking with the NRL.", + "length": 87 + }, + { + "text": "' Bird admitted he did know it was a police car but insists he didn't do 'it on the car'.", + "length": 89 + }, + { + "text": "Greg Bird pictured (front facing camera) on social media with his friends in a pre-wedding celebration .", + "length": 104 + }, + { + "text": "Bird, 30, an Australian international, married fiancée Beccy Rochow in the coastal town at the weekend.", + "length": 104 + }, + { + "text": "We'll continue to talk to the NRL and hold a club disciplinary meeting this week,' a club spokesman said.", + "length": 105 + }, + { + "text": "I'd like to apologise to the club and its fans, anyone I may have offended, especially my to my wife Beccy.", + "length": 107 + }, + { + "text": "This has put a dampener on our wedding weekend and I'm incredibly embarrassed and disappointed with myself.", + "length": 107 + }, + { + "text": "The NRL club has confirmed it will hold a disciplinary meeting later this week after discussions with the NRL .", + "length": 111 + }, + { + "text": "It's also been claimed that a number of witnesses allegedly looked on as the incident occurred, a night after Bird's wedding.", + "length": 125 + }, + { + "text": "NSW Police reported that Bird left a pub on Bay St at about 7:45pm on Sunday and allegedly urinated on the fully-marked vehicle.", + "length": 128 + }, + { + "text": "The Gold Coast Titans player faced the allegations of offensive behaviour after an incident in the popular holiday spot, Byron Bay.", + "length": 131 + }, + { + "text": "Greg Bird has been issued a criminal infringement notice according to police after an alleged incident on the NSW coastal town of Byron Bay .", + "length": 141 + }, + { + "text": "'They said they had received a statement from somebody and I received a ticket from the police and will be paying it in the coming days,' Bird said.", + "length": 148 + }, + { + "text": "He was honeymooning on the NSW far north coast when the incident - in which he's alleged to have urinated on a police car - took place a day after his wedding .", + "length": 160 + }, + { + "text": "Rugby league star Greg Bird has been slapped with a $200 criminal infringement notice for allegedly urinating on a police vehicle, just a day after his wedding.", + "length": 160 + }, + { + "text": "A police spokeswoman confirmed that the footballer had been at the Byron Bay police station on Monday morning, where he was subsequently issued with the infringement.", + "length": 166 + }, + { + "text": "Bird is considered one of the game's stars, was in the victorious NSW State of Origin team, and was part of the Australian team which recently made it to the final of the Four Nations tournament against New Zealand.", + "length": 215 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.86302649974823 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:10.935189145Z", + "first_section_created": "2025-12-23T09:32:10.935496458Z", + "last_section_published": "2025-12-23T09:32:10.935680865Z", + "all_results_received": "2025-12-23T09:32:10.997434383Z", + "output_generated": "2025-12-23T09:32:10.997619891Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:10.935496458Z", + "publish_time": "2025-12-23T09:32:10.935680865Z", + "first_worker_start": "2025-12-23T09:32:10.93628149Z", + "last_worker_end": "2025-12-23T09:32:10.996511Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:10.936307791Z", + "start_time": "2025-12-23T09:32:10.936393094Z", + "end_time": "2025-12-23T09:32:10.936459997Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:10.936432Z", + "start_time": "2025-12-23T09:32:10.936552Z", + "end_time": "2025-12-23T09:32:10.996511Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:10.936225587Z", + "start_time": "2025-12-23T09:32:10.93628149Z", + "end_time": "2025-12-23T09:32:10.936403295Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:10.936233788Z", + "start_time": "2025-12-23T09:32:10.93629789Z", + "end_time": "2025-12-23T09:32:10.936347892Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2687, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0005d61497d21ff37a17751829bd7e3b6e4a7c5c.json b/data/output/0005d61497d21ff37a17751829bd7e3b6e4a7c5c.json new file mode 100644 index 0000000..4812b7b --- /dev/null +++ b/data/output/0005d61497d21ff37a17751829bd7e3b6e4a7c5c.json @@ -0,0 +1,468 @@ +{ + "file_name": "0005d61497d21ff37a17751829bd7e3b6e4a7c5c.txt", + "total_words": 1255, + "top_n_words": [ + { + "word": "the", + "count": 73 + }, + { + "word": "of", + "count": 38 + }, + { + "word": "to", + "count": 31 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "that", + "count": 22 + }, + { + "word": "arizona", + "count": 21 + }, + { + "word": "said", + "count": 21 + }, + { + "word": "law", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "residency.", + "length": 10 + }, + { + "text": "\"What Gov.", + "length": 10 + }, + { + "text": "\" Republican Arizona Gov.", + "length": 25 + }, + { + "text": "About 1 million come from Asia.", + "length": 31 + }, + { + "text": "\"It doesn't do anything on that.", + "length": 32 + }, + { + "text": "Miguel Rivera, the group's chairman.", + "length": 36 + }, + { + "text": "Jan Brewer signed the law last week.", + "length": 36 + }, + { + "text": "What kind of country are we becoming?", + "length": 37 + }, + { + "text": "\"This law doesn't accomplish that,\" he said.", + "length": 44 + }, + { + "text": "\" Federal officials estimate there are about 10.", + "length": 48 + }, + { + "text": "At least five other states, including California, with 2.", + "length": 57 + }, + { + "text": "\"It is illegal in America, and it's certainly illegal in Arizona.", + "length": 65 + }, + { + "text": "In Arizona, two popular singers also will voice their opposition.", + "length": 65 + }, + { + "text": "6 million, have more undocumented immigrants, the government says.", + "length": 66 + }, + { + "text": "8 million illegal immigrants in the United States, of which about 6.", + "length": 68 + }, + { + "text": "6 million come from Mexico and 760,000 from the rest of Latin America.", + "length": 70 + }, + { + "text": "Phoenix Mayor Phil Gordon said Thursday that he is \"very disappointed.", + "length": 70 + }, + { + "text": "\" The uproar caused by the law has even spread to the nation's pastime.", + "length": 71 + }, + { + "text": "\"Racial profiling is illegal,\" Brewer said after signing the bill Friday.", + "length": 73 + }, + { + "text": "Some officials in Arizona have expressed their displeasure with the measure.", + "length": 76 + }, + { + "text": "The measure makes it a state crime to live or travel through Arizona illegally.", + "length": 79 + }, + { + "text": "Other critics say the bill is unconstitutional and will trample residents' civil rights.", + "length": 88 + }, + { + "text": "It also targets those who hire illegal immigrant day laborers or knowingly transport them.", + "length": 90 + }, + { + "text": "to try and engage voters, some segment of voters, to show up in this November's elections.", + "length": 90 + }, + { + "text": "Saenz, president of the Mexican American Legal Defense and Educational Fund, known as MALDEF.", + "length": 93 + }, + { + "text": "\" President Obama has called on Congress to pass a comprehensive immigration reform law this year.", + "length": 98 + }, + { + "text": "Grammy Award-winning Colombian singer Shakira is scheduled to meet with Gordon on Thursday evening.", + "length": 99 + }, + { + "text": "\" He said he is concerned that calls to boycott Arizona businesses and tourism will harm the state.", + "length": 99 + }, + { + "text": "The other states with more illegal immigrants than Arizona are Texas, Florida, New York and Georgia.", + "length": 100 + }, + { + "text": "It goes into effect 90 days after the close of the legislative session, which has not been determined.", + "length": 102 + }, + { + "text": "\" The National Coalition Of Latino Clergy \u0026 Christian Leaders said Sunday it also planned legal action.", + "length": 103 + }, + { + "text": "\"No one in our country should be required to produce their 'papers' or demand to prove their innocence.", + "length": 103 + }, + { + "text": "Brewer and others who support the law say it does not involve racial profiling or any other illegal acts.", + "length": 105 + }, + { + "text": "\"We must come together and stop SB1070 from pitting neighbor against neighbor to the detriment of us all.", + "length": 105 + }, + { + "text": "\"The rich tradition we all admire, of recognizing immigrants in the United States, has been harmed, undermined.", + "length": 111 + }, + { + "text": "Arizona, which is on the Mexican border, has about 460,000 undocumented immigrants, the federal government says.", + "length": 112 + }, + { + "text": "\"I think it is clearly a result of the federal government's failure to secure our border and to enforce our laws.", + "length": 113 + }, + { + "text": "It is a low point in modern America when a state law requires police to demand documents from people on the street.", + "length": 115 + }, + { + "text": "\"If this law were implemented, citizens would effectively have to carry 'their papers' at all times to avoid arrest.", + "length": 116 + }, + { + "text": "\" But a national Republican leader said Thursday that Arizona is just filling a void left by the federal government.", + "length": 116 + }, + { + "text": "Fifty-seven percent of those surveyed said they worried that they, a family member or a close friend could be deported.", + "length": 119 + }, + { + "text": "Brewer signed into law last week is a piece of legislation that threatens the very heart of this great state,\" Ronstadt said.", + "length": 125 + }, + { + "text": "\" The Arizona measure has drawn sharp criticism from the Mexican government, which issued an advisory to its citizens this week.", + "length": 128 + }, + { + "text": "Protesters plan to demonstrate against the Arizona Diamondbacks baseball team Thursday outside Wrigley Field in Chicago, Illinois.", + "length": 130 + }, + { + "text": "\"I think the people of Arizona have a right to pass their laws under the 10th Amendment,\" House Minority Leader John Boehner said.", + "length": 130 + }, + { + "text": "The secretary general of the Organization of American States and some member states also expressed concerns about the law Wednesday.", + "length": 132 + }, + { + "text": "The Pew survey also indicated that about one-third of the nation's Latinos say they or someone they know has experienced discrimination.", + "length": 136 + }, + { + "text": "\" Gordon said the real solution is comprehensive immigration reform that would allow more immigrants to legally enter the United States.", + "length": 136 + }, + { + "text": "About 9 percent said they had been stopped by police or other authorities and asked about their immigration status in the year before the survey.", + "length": 145 + }, + { + "text": "CNN has learned that Senate Majority Leader Harry Reid and other top Democratic senators will unveil the outlines of that legislation late Thursday.", + "length": 148 + }, + { + "text": "\"This training will include what does and does not constitute reasonable suspicion that a person is not legally present in the United States,\" she said.", + "length": 152 + }, + { + "text": "\"Quite simply, this law is a civil rights disaster and an insult to American values,\" said Mary Bauer, legal director of the Southern Poverty Law Center.", + "length": 153 + }, + { + "text": "Singer-songwriter Linda Ronstadt, an Arizona native of Mexican and German descent, also attended the Thursday afternoon rally with the immigrant rights groups.", + "length": 159 + }, + { + "text": "\"Our churches and pastors in Arizona are outraged about the significant threat this anti-immigrant law will have in the lives of Arizona's Latinos,\" said the Rev.", + "length": 162 + }, + { + "text": "\"I'm very incredulous that our state leaders -- our so-called leaders -- have allowed our state to be split when we're suffering economic hardships,\" Gordon told CNN.", + "length": 166 + }, + { + "text": "(CNN) -- Four groups that advocate for immigrant rights said Thursday they will challenge Arizona's new immigration law, which allows police to ask anyone for proof of legal U.", + "length": 176 + }, + { + "text": "A Pew Research Center survey late last year found that Americans believe Latinos are discriminated against more than any other major racial or ethnic group in American society.", + "length": 176 + }, + { + "text": "But Boehner said at a briefing Thursday that \"there's not a chance\" that Congress will approve the measure this year, especially after the recent passage of a health care reform bill.", + "length": 183 + }, + { + "text": "\"This law will only make the rampant racial profiling of Latinos that is already going on in Arizona much worse,\" said Alessandra Soler Meetze, executive director of the ACLU of Arizona.", + "length": 186 + }, + { + "text": "The law requires immigrants to carry their alien registration documents at all times and requires police to question people if there is reason to suspect they're in the United States illegally.", + "length": 193 + }, + { + "text": "In addition to signing the law, Brewer also issued an executive order that requires training for local officers on how to implement the law without engaging in racial profiling or discrimination.", + "length": 195 + }, + { + "text": "\"This policy violates the rights of American citizens, particularly the fast-growing Latino population of Arizona, by eliminating the basic right of due process, which we are certain that the courts will agree,\" Rivera said.", + "length": 224 + }, + { + "text": "The Mexican American Legal Defense and Educational Fund, the American Civil Liberties Union, the ACLU of Arizona and the National Immigration Law Center held a news conference Thursday in Phoenix to announce the legal challenge.", + "length": 228 + }, + { + "text": "\"The Arizona community can be assured that a vigorous and sophisticated legal challenge will be mounted, in advance of SB1070's implementation, seeking to prevent this unconstitutional and discriminatory law from ever taking effect,\" said Thomas A.", + "length": 248 + }, + { + "text": "\"This is an issue of concern to all citizens of the Americas, beginning with the citizens of the United States, a country with a very rich tradition of immigration and respect for immigrants who have come to lead a better life,\" OAS Secretary General Jose Miguel Insulza said.", + "length": 276 + }, + { + "text": "\"I've been out here for a little while and know that in the middle of an election year, after we've had bills like health care shoved down our throats and the process twisted, tortured, pressured, bribed, you cannot do a serious piece of legislation of this size, with this difficulty, in this environment,\" he said.", + "length": 316 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5959261357784271 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:11.436468285Z", + "first_section_created": "2025-12-23T09:32:11.438011848Z", + "last_section_published": "2025-12-23T09:32:11.438213456Z", + "all_results_received": "2025-12-23T09:32:11.53419187Z", + "output_generated": "2025-12-23T09:32:11.534491482Z", + "total_processing_time_ms": 98, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 95, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:11.438011848Z", + "publish_time": "2025-12-23T09:32:11.438181455Z", + "first_worker_start": "2025-12-23T09:32:11.438953986Z", + "last_worker_end": "2025-12-23T09:32:11.533171Z", + "total_journey_time_ms": 95, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:11.438981987Z", + "start_time": "2025-12-23T09:32:11.439062891Z", + "end_time": "2025-12-23T09:32:11.439186496Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:11.439199Z", + "start_time": "2025-12-23T09:32:11.439349Z", + "end_time": "2025-12-23T09:32:11.533171Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 93 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:11.438930685Z", + "start_time": "2025-12-23T09:32:11.43904299Z", + "end_time": "2025-12-23T09:32:11.439213297Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:11.438862882Z", + "start_time": "2025-12-23T09:32:11.438953986Z", + "end_time": "2025-12-23T09:32:11.439025789Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:11.438192255Z", + "publish_time": "2025-12-23T09:32:11.438213456Z", + "first_worker_start": "2025-12-23T09:32:11.43904559Z", + "last_worker_end": "2025-12-23T09:32:11.501972Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:11.439085392Z", + "start_time": "2025-12-23T09:32:11.439132293Z", + "end_time": "2025-12-23T09:32:11.439178895Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:11.439205Z", + "start_time": "2025-12-23T09:32:11.43933Z", + "end_time": "2025-12-23T09:32:11.501972Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:11.438996288Z", + "start_time": "2025-12-23T09:32:11.43904559Z", + "end_time": "2025-12-23T09:32:11.439128193Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:11.439030089Z", + "start_time": "2025-12-23T09:32:11.439082691Z", + "end_time": "2025-12-23T09:32:11.439187396Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 155, + "min_processing_ms": 62, + "max_processing_ms": 93, + "avg_processing_ms": 77, + "median_processing_ms": 93, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3823, + "slowest_section_id": 0, + "slowest_section_time_ms": 95 + } +} diff --git a/data/output/0006021f772fad0aa78a977ce4a31b3faa6e6fe5.json b/data/output/0006021f772fad0aa78a977ce4a31b3faa6e6fe5.json new file mode 100644 index 0000000..d3098c6 --- /dev/null +++ b/data/output/0006021f772fad0aa78a977ce4a31b3faa6e6fe5.json @@ -0,0 +1,428 @@ +{ + "file_name": "0006021f772fad0aa78a977ce4a31b3faa6e6fe5.txt", + "total_words": 838, + "top_n_words": [ + { + "word": "the", + "count": 34 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "is", + "count": 21 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "senate", + "count": 12 + }, + { + "word": "as", + "count": 11 + }, + { + "word": "has", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "Sen.", + "length": 4 + }, + { + "text": "But when Rep.", + "length": 13 + }, + { + "text": "Republican Rep.", + "length": 15 + }, + { + "text": "Key races in 2014 .", + "length": 19 + }, + { + "text": "Senate in Kentucky.", + "length": 19 + }, + { + "text": "Graham has some advantages.", + "length": 27 + }, + { + "text": "Complete midterm coverage .", + "length": 27 + }, + { + "text": "Her father is longtime Sen.", + "length": 27 + }, + { + "text": "Senate, Iowa: When popular Sen.", + "length": 31 + }, + { + "text": "Senate, Louisiana: Democratic Sen.", + "length": 34 + }, + { + "text": "Walker, Burke tied up in new poll .", + "length": 35 + }, + { + "text": "How Mitch McConnell crushed the tea party .", + "length": 43 + }, + { + "text": "The race with the most at stake is the one for U.", + "length": 49 + }, + { + "text": "National Democrats go after Cassidy on Medicare .", + "length": 49 + }, + { + "text": "But this race could be a bright spot for Democrats.", + "length": 51 + }, + { + "text": "Here are four other races that are worth watching: .", + "length": 52 + }, + { + "text": "Mary Landrieu to reimburse Senate for charter flight .", + "length": 54 + }, + { + "text": "He also drastically limited workers' bargaining rights.", + "length": 55 + }, + { + "text": "Climate group attacks Ernst on tax pledge, not climate .", + "length": 56 + }, + { + "text": "Economic policy is a central component of this campaign.", + "length": 56 + }, + { + "text": "Bruce Braley jumped in, Democrats' confidence was restored.", + "length": 59 + }, + { + "text": "Most interestingly, a political roller coaster is possible.", + "length": 59 + }, + { + "text": "4 billion is going to be spent on advertising this midterm season.", + "length": 66 + }, + { + "text": "This is one of the most interesting and critical races in the country.", + "length": 70 + }, + { + "text": "Bill Cassidy is the person who is giving Landrieu another difficult run.", + "length": 72 + }, + { + "text": "her access to his connections and deep knowledge of running successful campaigns.", + "length": 81 + }, + { + "text": "So, if you turn on your TV, expect to see more -- and nastier -- political advertisements.", + "length": 90 + }, + { + "text": "Landrieu, meanwhile, is attempting to paint her Senate tenure as a picture of independence.", + "length": 91 + }, + { + "text": "Mary Landrieu always has tough races, and her fourth bid for the Senate seat is no exception.", + "length": 93 + }, + { + "text": "In fact, Elizabeth Wilner, senior vice president of Kantar Ad Intelligence, says as much as $3.", + "length": 95 + }, + { + "text": "Wisconsin governor: The Wisconsin governor's race has many national repercussions, as Republican Gov.", + "length": 101 + }, + { + "text": "Scott Walker is locked in a tight re-election battle against former Trek bicycle executive Mary Burke.", + "length": 102 + }, + { + "text": "While Labor Day is the unofficial end of summer, it's also the unofficial start to the campaign season.", + "length": 103 + }, + { + "text": "There's a chance the Louisiana race, and the balance of the Senate, might be dragged out until December.", + "length": 104 + }, + { + "text": "She has no problem getting money -- raising more than Southerland -- and she has a Florida-famous last name.", + "length": 108 + }, + { + "text": "Tom Harkin decided to retire, Democrats had a small panic attack as this solidly Democratic seat was now in play.", + "length": 113 + }, + { + "text": "Cassidy is tying Landrieu to Obama in this conservative-leaning state while painting her as a corrupt Washington insider.", + "length": 121 + }, + { + "text": "That means politicking will be on the rise, especially as control of the Senate is at stake as well as control of 36 state houses.", + "length": 130 + }, + { + "text": "But then that confidence has been shaken as Braley has run a gaffe-prone campaign that involves digs at farmers and meandering chickens.", + "length": 136 + }, + { + "text": "McConnell, a shrewd politician, prolific fundraiser and expert campaigner, has had numerous missteps, making this race even more interesting.", + "length": 141 + }, + { + "text": "If Landrieu or Cassidy doesn't receive more than 50% of the vote on Election Day in November, a winner won't be named until that state's December 6 election.", + "length": 157 + }, + { + "text": "Grimes, meanwhile, has also stumbled when talking about foreign policy, and questions have arisen about a possible sweetheart deal involving her campaign bus.", + "length": 158 + }, + { + "text": "His troubles include a flubbed campaign ad, a campaign manager who was a little too honest, the recent resignation of that manager and a caught-on-tape moment.", + "length": 159 + }, + { + "text": "This is not just one of the only Senate races Republicans are at risk of losing, but also the race where the top Senate Republican is at risk of losing his job.", + "length": 160 + }, + { + "text": "Walker has gained prominence in conservative politics for governing as a fiscal conservative and making deep cuts to spending by cutting public union workers' pensions.", + "length": 168 + }, + { + "text": "Democrats and Republicans understand the stakes, and President Barack Obama traveled to Wisconsin on Labor Day to speak at a union event in a trip packed with political symbolism.", + "length": 179 + }, + { + "text": "Florida's 2nd Congressional District: There is little to no chance that Republicans will lose control of the House of Representatives, and some race analyzers say the GOP will even pick up seats.", + "length": 195 + }, + { + "text": "Even in what is expected to be a difficult year for Democrats, Democratic candidate Gwen Graham could pull out a victory in this Republican-leaning district of Tallahassee and the central part of the Florida Panhandle.", + "length": 218 + }, + { + "text": "Meanwhile, Democrats, backed by labor unions, are again fighting to defeat Walker -- they forced a recall two years ago that Walker won -- to move forward on more Democratic economic policies, including lifting the minimum wage.", + "length": 228 + }, + { + "text": "His missteps -- combined with the surprising strength of Republican candidate Joni Ernst, who has run a great campaign that began with a breakout performance in the crowded Republican primary -- make this a possible and unexpected pickup for Republicans.", + "length": 254 + }, + { + "text": "The Republican leader of the Senate, Mitch McConnell, is in a fight for his political life against Alison Lundergan Grimes, a relatively inexperienced Democratic politician who was just 8 years old when McConnell started his first term in the Senate in 1985.", + "length": 258 + }, + { + "text": "Walker, who is also being investigated for alleged illegal campaign coordination with outside groups, is considered a potential 2016 Republican presidential candidate, but if he loses his gubernatorial race, his path to the presidency will be very, very narrow.", + "length": 261 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5609220564365387 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:11.938989375Z", + "first_section_created": "2025-12-23T09:32:11.939335089Z", + "last_section_published": "2025-12-23T09:32:11.939700404Z", + "all_results_received": "2025-12-23T09:32:12.008107493Z", + "output_generated": "2025-12-23T09:32:12.008414206Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:11.939335089Z", + "publish_time": "2025-12-23T09:32:11.939617601Z", + "first_worker_start": "2025-12-23T09:32:11.940194624Z", + "last_worker_end": "2025-12-23T09:32:12.007208Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:11.940261127Z", + "start_time": "2025-12-23T09:32:11.940347431Z", + "end_time": "2025-12-23T09:32:11.940468835Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:11.941334Z", + "start_time": "2025-12-23T09:32:11.941536Z", + "end_time": "2025-12-23T09:32:12.007208Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:11.940209925Z", + "start_time": "2025-12-23T09:32:11.940314829Z", + "end_time": "2025-12-23T09:32:11.940538738Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:11.940147122Z", + "start_time": "2025-12-23T09:32:11.940194624Z", + "end_time": "2025-12-23T09:32:11.940252927Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:11.939658602Z", + "publish_time": "2025-12-23T09:32:11.939700404Z", + "first_worker_start": "2025-12-23T09:32:11.940173923Z", + "last_worker_end": "2025-12-23T09:32:11.976015Z", + "total_journey_time_ms": 36, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:11.940466135Z", + "start_time": "2025-12-23T09:32:11.940560239Z", + "end_time": "2025-12-23T09:32:11.940566739Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:11.940647Z", + "start_time": "2025-12-23T09:32:11.940779Z", + "end_time": "2025-12-23T09:32:11.976015Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 35 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:11.940385232Z", + "start_time": "2025-12-23T09:32:11.940433334Z", + "end_time": "2025-12-23T09:32:11.940440234Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:11.940123921Z", + "start_time": "2025-12-23T09:32:11.940173923Z", + "end_time": "2025-12-23T09:32:11.940175524Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 100, + "min_processing_ms": 35, + "max_processing_ms": 65, + "avg_processing_ms": 50, + "median_processing_ms": 65, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2563, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/00061f5113a7e6334310f88a12c54d87af483347.json b/data/output/00061f5113a7e6334310f88a12c54d87af483347.json new file mode 100644 index 0000000..db971e3 --- /dev/null +++ b/data/output/00061f5113a7e6334310f88a12c54d87af483347.json @@ -0,0 +1,496 @@ +{ + "file_name": "00061f5113a7e6334310f88a12c54d87af483347.txt", + "total_words": 1034, + "top_n_words": [ + { + "word": "the", + "count": 54 + }, + { + "word": "and", + "count": 31 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "it", + "count": 24 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "he", + "count": 17 + }, + { + "word": "said", + "count": 17 + }, + { + "word": "mayer", + "count": 15 + }, + { + "word": "octopus", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Food?", + "length": 5 + }, + { + "text": "place.", + "length": 6 + }, + { + "text": "' The .", + "length": 7 + }, + { + "text": "animal.", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "It's bad form.", + "length": 14 + }, + { + "text": "'That's not true.", + "length": 17 + }, + { + "text": "'I eat it for meat.", + "length": 19 + }, + { + "text": "'It's just not done.", + "length": 20 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Watch the video here: .", + "length": 23 + }, + { + "text": "'It's like deer hunting.", + "length": 24 + }, + { + "text": "19:43 EST, 25 November 2012 .", + "length": 29 + }, + { + "text": "01:10 EST, 26 November 2012 .", + "length": 29 + }, + { + "text": "It's no different than fishing.", + "length": 31 + }, + { + "text": "I don't have a problem with it.", + "length": 31 + }, + { + "text": "Their shock turned to horror as a .", + "length": 35 + }, + { + "text": "‘People dive for all sorts of reasons.", + "length": 40 + }, + { + "text": "Angry fellow divers from the shore took .", + "length": 41 + }, + { + "text": "I ignored them and ended up driving away.", + "length": 41 + }, + { + "text": "James Nye, Leslie Larson and Nina Golgowski .", + "length": 45 + }, + { + "text": "'I probably would have gone at a different time.", + "length": 48 + }, + { + "text": "It's a ban Mr Mayer has since latched onto himself.", + "length": 51 + }, + { + "text": "female onto the bed of his truck before driving off.", + "length": 52 + }, + { + "text": "Even if you can do it, you shouldn't do it,’ he said.", + "length": 55 + }, + { + "text": "There were no eggs under it, and we checked,' said Mayer.", + "length": 57 + }, + { + "text": "nothing wrong, Mayer could have acted with more sensitivity.", + "length": 60 + }, + { + "text": "'As they were coming in you could tell the octopus was alive.", + "length": 61 + }, + { + "text": "It could have been done at a better time,' said Wendy Willette.", + "length": 63 + }, + { + "text": "'He wanted me to get something from nature, so I got an octopus.", + "length": 64 + }, + { + "text": "I probably would have gone to another area of Cove Two,' he said.", + "length": 65 + }, + { + "text": "game warden who inspected the catch also said that despite doing .", + "length": 66 + }, + { + "text": "It's just a different animal,' said Mayer at the time to Komo News.", + "length": 67 + }, + { + "text": "onto his Facebook account and now is in the position of having to .", + "length": 67 + }, + { + "text": "I caught it, and then these divers came up and started yelling at me.", + "length": 69 + }, + { + "text": "photographs of Mayer proudly standing with the large octopus and then .", + "length": 71 + }, + { + "text": "grinning Mayer posted images of himself measuring the now dead octopus .", + "length": 72 + }, + { + "text": "The animals are not protected and their population is said to be healthy.", + "length": 73 + }, + { + "text": "watched in disbelief as he and his friend tossed the still live 30 pound .", + "length": 74 + }, + { + "text": "justify to the diving community why he hunted the gentle and intelligent .", + "length": 74 + }, + { + "text": "‘I don't have an issue with hunting,’ Mr Bailey told the Seattle Times.", + "length": 75 + }, + { + "text": "Dylan Mayer measures out the dead octopus on the floor of his garage in Seattle .", + "length": 81 + }, + { + "text": "'I think the timing, manner and place where the harvest occurred may be the issue.", + "length": 82 + }, + { + "text": "Seattle Times also reports Myers having had a permit to catch shellfish in the area.", + "length": 84 + }, + { + "text": "You need to be sensitive to other drivers and people if you're going to be a sportsman.", + "length": 87 + }, + { + "text": "' Hunting octopus is legal in Central Puget Sound, with a maximum catch of one per day.", + "length": 87 + }, + { + "text": "You don't kill a deer while kids are viewing it, and I think it's a similar problem here.", + "length": 89 + }, + { + "text": "' 'They’re incredibly intelligent, curious, very playful,' said avid diver Drew Collins.", + "length": 90 + }, + { + "text": "' Today, however, he says if he could go back to that day he would have done things differently.", + "length": 96 + }, + { + "text": "'I even had a game warden come over and look at it, and even they said there was no problem with it.", + "length": 100 + }, + { + "text": "’ But he added: ‘People come from all over the world to dive here and see the octopus that live here.", + "length": 105 + }, + { + "text": "Catching the octopus for a friend' s class assignment, requiring him to draw something from nature, he said: .", + "length": 110 + }, + { + "text": "'We believe this area may merit additional restrictions to enhance the traditional uses of this popular beach.", + "length": 110 + }, + { + "text": "It is not whether you hunt, it is where you hunt, and there are appropriate and inappropriate places to do that.", + "length": 112 + }, + { + "text": "Mr Mayer said he planned to eat to octopus and that catching it was no different than catching an eating a fish .", + "length": 113 + }, + { + "text": "The uproar caused by Mayer has led to the 19-year-old receiving dozens of threatening phone calls and abusive emails.", + "length": 117 + }, + { + "text": "Legal: Game wardens said the catch was completely legal and that the diver was within his rights to harvest the animal .", + "length": 120 + }, + { + "text": "Fresh catch: Mr Mayer is photographed after loading the live cephalopod into the back of his pickup truck on October 31st .", + "length": 123 + }, + { + "text": "' Indeed, Mayer told Komo News that he has now been banned from several diver shops in the Seattle area because of the octopus hunt.", + "length": 132 + }, + { + "text": "The new Marine Protected Area in Central Puget Sound will require the commission's vote for the new protected area and could take months.", + "length": 137 + }, + { + "text": "Among their considerations are designating Seacrest Park as a marine protected area or prohibiting hunting the animals anywhere in the state.", + "length": 141 + }, + { + "text": "He said that his dream of becoming a rescue-diver is in jeopardy because he has been banned from several diving schools because of his actions.", + "length": 143 + }, + { + "text": "Controversy: Outcry after Dylan Mayer's catch of an octopus, seen proudly displayed in his arms, has launched a potential ban on their hunt statewide .", + "length": 151 + }, + { + "text": "'I didn’t know they were so beloved, or I wouldn’t have done it,' he said according to a WDFW release that reported him signing the petition as well.", + "length": 153 + }, + { + "text": "It was writhing around and they were wrestling with it,' said Bob Bailey, a dive instructor who witnessed Mayer bring the octopus on shore with one of his students.", + "length": 164 + }, + { + "text": "The wannabe rescue diver has said that he has been demonized by the local scuba community and has had to deny claims that the octopus was sitting on eggs when he captured her.", + "length": 175 + }, + { + "text": "Local activists photographed Mayer's catch and posted the photos on their website, urging outraged residents to sign a petition banning the harvesting of giant Pacific octopuses.", + "length": 178 + }, + { + "text": "But on October 31st, the sight of Dylan Mayer punching and beating the 30-pound female on shore before throwing it in his truck bed and driving off easily unnerved many residents.", + "length": 179 + }, + { + "text": "’ Earlier this month Scott Lundy, a member of the Washington Scuba Alliance, presented the WDFW a petition signed by 5,000 divers supporting a ban on killing octopuses at Seacrest Park.", + "length": 187 + }, + { + "text": "'The harvesting of this animal has resulted in a strong, negative reaction from the public and the dive community,' Washington Department of Fish and Wildlife Director Phil Anderson said in a release.", + "length": 200 + }, + { + "text": "National outcry over a 19-year-old Seattle diver who caught and killed an octopus while publically boasting of his kill has pressed lawmakers one step closer to banning their hunt potentially statewide.", + "length": 202 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5022558569908142 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:12.440458723Z", + "first_section_created": "2025-12-23T09:32:12.440824637Z", + "last_section_published": "2025-12-23T09:32:12.441191052Z", + "all_results_received": "2025-12-23T09:32:12.562409595Z", + "output_generated": "2025-12-23T09:32:12.562576802Z", + "total_processing_time_ms": 122, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 121, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:12.440824637Z", + "publish_time": "2025-12-23T09:32:12.441055247Z", + "first_worker_start": "2025-12-23T09:32:12.441729974Z", + "last_worker_end": "2025-12-23T09:32:12.531747Z", + "total_journey_time_ms": 90, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:12.441726474Z", + "start_time": "2025-12-23T09:32:12.441792477Z", + "end_time": "2025-12-23T09:32:12.441889181Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:12.442082Z", + "start_time": "2025-12-23T09:32:12.442212Z", + "end_time": "2025-12-23T09:32:12.531747Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 89 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:12.441666172Z", + "start_time": "2025-12-23T09:32:12.441742075Z", + "end_time": "2025-12-23T09:32:12.443495846Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 1 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:12.441658271Z", + "start_time": "2025-12-23T09:32:12.441729974Z", + "end_time": "2025-12-23T09:32:12.441818978Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:12.44112005Z", + "publish_time": "2025-12-23T09:32:12.441191052Z", + "first_worker_start": "2025-12-23T09:32:12.441921682Z", + "last_worker_end": "2025-12-23T09:32:12.561501Z", + "total_journey_time_ms": 120, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:12.441945983Z", + "start_time": "2025-12-23T09:32:12.442013986Z", + "end_time": "2025-12-23T09:32:12.442030787Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:12.442145Z", + "start_time": "2025-12-23T09:32:12.442273Z", + "end_time": "2025-12-23T09:32:12.561501Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 119 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:12.441894781Z", + "start_time": "2025-12-23T09:32:12.442198493Z", + "end_time": "2025-12-23T09:32:12.442241695Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:12.441895181Z", + "start_time": "2025-12-23T09:32:12.441921682Z", + "end_time": "2025-12-23T09:32:12.441939183Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 208, + "min_processing_ms": 89, + "max_processing_ms": 119, + "avg_processing_ms": 104, + "median_processing_ms": 119, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 1, + "min_processing_ms": 0, + "max_processing_ms": 1, + "avg_processing_ms": 0, + "median_processing_ms": 1, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2895, + "slowest_section_id": 1, + "slowest_section_time_ms": 120 + } +} diff --git a/data/output/0006a5e48ca87ba69b2eb1cdbce272a44decd7d3.json b/data/output/0006a5e48ca87ba69b2eb1cdbce272a44decd7d3.json new file mode 100644 index 0000000..de6f6de --- /dev/null +++ b/data/output/0006a5e48ca87ba69b2eb1cdbce272a44decd7d3.json @@ -0,0 +1,206 @@ +{ + "file_name": "0006a5e48ca87ba69b2eb1cdbce272a44decd7d3.txt", + "total_words": 261, + "top_n_words": [ + { + "word": "the", + "count": 31 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "man", + "count": 9 + }, + { + "word": "meat", + "count": 8 + }, + { + "word": "his", + "count": 7 + }, + { + "word": "it", + "count": 7 + }, + { + "word": "bird", + "count": 6 + }, + { + "word": "with", + "count": 6 + }, + { + "word": "and", + "count": 5 + }, + { + "word": "as", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Kookaburras have extremely strong beaks, which they use to pound meat to make it easier to eat .", + "length": 96 + }, + { + "text": "Incredible footage has surfaced of a Queensland man feeding a kookaburra directly from his mouth.", + "length": 97 + }, + { + "text": "The buoyant bird dangling by the strip of meat as the man holds his hands to protect the bird if it falls.", + "length": 106 + }, + { + "text": "Even young kookaburras have extremely strong beaks, which they use to pound meat to make it easier to eat.", + "length": 106 + }, + { + "text": "But when he lifts it up and dangles it down, the bird becomes placid and patiently waits for the man to let go.", + "length": 111 + }, + { + "text": "It begins with the man standing on a balcony saying to the camera: ‘feeding the Kookaburras… Mudgeeraba style.", + "length": 114 + }, + { + "text": "As the man leans over the balcony and drops the meat from his mouth, the hungry kookaburra soars away with his new catch.", + "length": 121 + }, + { + "text": "’ As he lowers the strip of meat within the reach of the kookaburra, the bird begins squawking uncontrollable and flapping its wings.", + "length": 135 + }, + { + "text": "After witnessing some Kookaburras fighting over meat with each other, the sunglass wearing man decided to see if they would play along with him.", + "length": 144 + }, + { + "text": "The vision, which comes from Mudgeeraba in southeast Queensland, shows the buoyant bird dangling by the strip of meat as the man holds his hands to protect the bird if it falls.", + "length": 177 + }, + { + "text": "The video begins with the man standing on a balcony saying to the camera: ‘feeding the Kookaburras… Mudgeeraba style’ As the man leans over the balcony and drops the meat from his mouth, the hungry kookaburra soars away with his new catch .", + "length": 246 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.36280739307403564 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:12.94193277Z", + "first_section_created": "2025-12-23T09:32:12.942248583Z", + "last_section_published": "2025-12-23T09:32:12.94243069Z", + "all_results_received": "2025-12-23T09:32:13.009217714Z", + "output_generated": "2025-12-23T09:32:13.009325618Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:12.942248583Z", + "publish_time": "2025-12-23T09:32:12.94243069Z", + "first_worker_start": "2025-12-23T09:32:12.943135919Z", + "last_worker_end": "2025-12-23T09:32:13.008371Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:12.94314682Z", + "start_time": "2025-12-23T09:32:12.943201122Z", + "end_time": "2025-12-23T09:32:12.943238523Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:12.943317Z", + "start_time": "2025-12-23T09:32:12.943499Z", + "end_time": "2025-12-23T09:32:13.008371Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:12.943065116Z", + "start_time": "2025-12-23T09:32:12.943135919Z", + "end_time": "2025-12-23T09:32:12.943200622Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:12.943085917Z", + "start_time": "2025-12-23T09:32:12.943137719Z", + "end_time": "2025-12-23T09:32:12.94316362Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1486, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0006b3b15aeb12b0e724f5af8d3c81f19f99b3d3.json b/data/output/0006b3b15aeb12b0e724f5af8d3c81f19f99b3d3.json new file mode 100644 index 0000000..f940cd6 --- /dev/null +++ b/data/output/0006b3b15aeb12b0e724f5af8d3c81f19f99b3d3.json @@ -0,0 +1,378 @@ +{ + "file_name": "0006b3b15aeb12b0e724f5af8d3c81f19f99b3d3.txt", + "total_words": 698, + "top_n_words": [ + { + "word": "the", + "count": 45 + }, + { + "word": "to", + "count": 25 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "her", + "count": 21 + }, + { + "word": "was", + "count": 21 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "kenney", + "count": 16 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "she", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "It .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "'She .", + "length": 6 + }, + { + "text": "wrote.", + "length": 6 + }, + { + "text": "Kenney.", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "reporters.", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Helicopter .", + "length": 12 + }, + { + "text": "did to find Kaitlin.", + "length": 20 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "10:45 EST, 2 April 2013 .", + "length": 25 + }, + { + "text": "12:21 EST, 2 April 2013 .", + "length": 25 + }, + { + "text": "fullest, always with a smile.", + "length": 29 + }, + { + "text": "out there and willing to explore.", + "length": 33 + }, + { + "text": "At the time of her disappearance, .", + "length": 35 + }, + { + "text": "was reported missing, but to no avail.", + "length": 38 + }, + { + "text": "' 'She shared her spark of life and her .", + "length": 41 + }, + { + "text": "We were blessed to have her in our lives.", + "length": 41 + }, + { + "text": "Thanks to them for bringing her home to us.", + "length": 43 + }, + { + "text": "It was Kenney's first and last trip to the Grand Canyon.", + "length": 56 + }, + { + "text": "'She never did anything that was ordinary and she was always .", + "length": 62 + }, + { + "text": "'We are very grateful to the many professionals from the Grand .", + "length": 64 + }, + { + "text": "'The Grand Canyon trip was an adventure she wanted to experience.", + "length": 65 + }, + { + "text": "beauty and her kindness and free spirit with everybody,' she told .", + "length": 67 + }, + { + "text": "Kenney's mother said she feared that her daughter had accidentally .", + "length": 68 + }, + { + "text": "was a source of love, joy and friendship to so many,' Linnea Kenney .", + "length": 69 + }, + { + "text": "fallen into the frigid water while going to the bathroom in the dark.", + "length": 69 + }, + { + "text": "taken by helicopter to a medical examiner, who confirmed that it was .", + "length": 70 + }, + { + "text": "was below zero the night Kenney disappeared, and the 21-year-old woman .", + "length": 72 + }, + { + "text": "body spotted by rafters at about river mile 165 near Tuckup Canyon was .", + "length": 72 + }, + { + "text": "Surprise Valley and along the Colorado River for two days after Kenney .", + "length": 72 + }, + { + "text": "and ground search parties scoured the Tapeats and Deer Creek drainages, .", + "length": 73 + }, + { + "text": "Canyon National Park and the river rafting community for everything they .", + "length": 74 + }, + { + "text": "The plan was for the campers to float 280 miles from Lee’s Ferry to Pearce Ferry.", + "length": 83 + }, + { + "text": "did not have her coat on at the time, according to her family, The Denver Channel reported.", + "length": 91 + }, + { + "text": "The 21-year-old Englewood resident was last seen on the night of January 11 at a rafting camp.", + "length": 94 + }, + { + "text": "Gone: Kenny was rafting through the Grand Canyon's waterways when she vanished without a trace .", + "length": 96 + }, + { + "text": "' Kenney was on a month-long rafting trip with a dozen friends through the Grand Canyon during winter break.", + "length": 108 + }, + { + "text": "'She was never one to shy away from challenges and the outdoors,' Linnea Kenney told the paper of her daughter.", + "length": 111 + }, + { + "text": "Her mother wrote on Facebook after her disappearance: 'She was our balloon let loose in a room and lived life to it's .", + "length": 119 + }, + { + "text": "Last week, Kenney's mother, Linnea, wrote on Facebook about getting the news that her daughter's remains have been recovered.", + "length": 125 + }, + { + "text": "Searching: Kenney was last seen wearing a tan coat and tan pants, a grey shirt, multi-colored scarf, black knit cap and hiking boots .", + "length": 134 + }, + { + "text": "' Mourning: Though no body has been recovered mother Linnea believes her daugher fell into a river and drowned while on a rafting trip .", + "length": 136 + }, + { + "text": "Free spirit: Kenney was an anthropology major at University of Montana and an accomplished musician who played the fiddle and the mandolin .", + "length": 140 + }, + { + "text": "The search for a 21-year-old Colorado woman who went missing in January while on a Grand Canyon rafting trip came to a tragic conclusion this week.", + "length": 147 + }, + { + "text": "An accomplished musician, Kenney would talk about how she needed to bring her mandolin along on the rating trip, which coincided with her 21st birthday.", + "length": 152 + }, + { + "text": "According to a National Park Service press release, the body of a woman that was spotted in the Colorado River March 21 has been identified as Kaitlin Anne Kenney.", + "length": 163 + }, + { + "text": "Worst fear confirmed: Officials have announced that the body found in the Colorado River in late March is that of Kaitlin Anne Kenney, 21, who went missing in January .", + "length": 168 + }, + { + "text": "in anticipation of her great adventure, the college student spoke with exuberance to her friends about leading the life of a 'river rat' and surviving in the wilderness.", + "length": 169 + }, + { + "text": "Kenney, an anthropology major at University of Montana, was a talented musician who won first place in the young adult category at the 2011 Colorado Fiddle Championships.", + "length": 170 + }, + { + "text": "Life cut short: Kenney was passionate about the great outdoors and was excited to go on the 280-mile rafting trip, her first ever, which coincided with her 21st birthday .", + "length": 171 + }, + { + "text": "The young woman decided to go on the fateful trip after completing a summer semester with the Wild Rockies Filed Institute, which had sparked her interest to further explore the great outdoors, according to The Montana Kaimin.", + "length": 226 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.49335935711860657 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:13.443198809Z", + "first_section_created": "2025-12-23T09:32:13.443540823Z", + "last_section_published": "2025-12-23T09:32:13.443743731Z", + "all_results_received": "2025-12-23T09:32:13.511186381Z", + "output_generated": "2025-12-23T09:32:13.511367089Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:13.443540823Z", + "publish_time": "2025-12-23T09:32:13.443743731Z", + "first_worker_start": "2025-12-23T09:32:13.444334956Z", + "last_worker_end": "2025-12-23T09:32:13.510264Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:13.444348156Z", + "start_time": "2025-12-23T09:32:13.444422059Z", + "end_time": "2025-12-23T09:32:13.444511763Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:13.444723Z", + "start_time": "2025-12-23T09:32:13.444871Z", + "end_time": "2025-12-23T09:32:13.510264Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:13.444318655Z", + "start_time": "2025-12-23T09:32:13.444393458Z", + "end_time": "2025-12-23T09:32:13.444539664Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:13.444257752Z", + "start_time": "2025-12-23T09:32:13.444334956Z", + "end_time": "2025-12-23T09:32:13.444393858Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4038, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/00077395f92430e209a0b3f781b143b5e9af2348.json b/data/output/00077395f92430e209a0b3f781b143b5e9af2348.json new file mode 100644 index 0000000..575fda7 --- /dev/null +++ b/data/output/00077395f92430e209a0b3f781b143b5e9af2348.json @@ -0,0 +1,508 @@ +{ + "file_name": "00077395f92430e209a0b3f781b143b5e9af2348.txt", + "total_words": 1519, + "top_n_words": [ + { + "word": "the", + "count": 97 + }, + { + "word": "of", + "count": 49 + }, + { + "word": "a", + "count": 48 + }, + { + "word": "to", + "count": 48 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "it", + "count": 22 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "for", + "count": 16 + }, + { + "word": "s", + "count": 16 + }, + { + "word": "shirley", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "He had a point.", + "length": 15 + }, + { + "text": "I feel betrayed.", + "length": 16 + }, + { + "text": "People are really livid.", + "length": 24 + }, + { + "text": "It just makes my blood boil.", + "length": 28 + }, + { + "text": "It will rip the heart out of our community.", + "length": 43 + }, + { + "text": "That makes them hypocrites as well as NIMBYs.", + "length": 45 + }, + { + "text": "But details of the deal are hard to ascertain.", + "length": 46 + }, + { + "text": "An organiser tells me: 'It was for her own safety.", + "length": 50 + }, + { + "text": "Petty though it seems, they've barely spoken since.", + "length": 51 + }, + { + "text": "Many see it as emblematic of a wider national malaise.", + "length": 54 + }, + { + "text": "He did not respond to the Mail's requests for comment.", + "length": 54 + }, + { + "text": "'Philip signed this silly deal without telling Augusta.", + "length": 55 + }, + { + "text": "Locals are calling it 'a battle for Shakespeare country'.", + "length": 57 + }, + { + "text": "'This will give us 80 in one go, in a highly visible spot.", + "length": 58 + }, + { + "text": "' Much personal anger is directed at Philip Shirley himself.", + "length": 60 + }, + { + "text": "'A lot of this is about class,' said a source close to the family.", + "length": 66 + }, + { + "text": "They held angry public meetings, signed petitions, met politicians.", + "length": 67 + }, + { + "text": "'They all have connections to the party and many of them are donors.", + "length": 68 + }, + { + "text": "Ettington residents against the proposed development in the village.", + "length": 68 + }, + { + "text": "Some extended family members now won't vote Conservative,' says one.", + "length": 68 + }, + { + "text": "The area's Tory MP, Nadhim Zahawi, who backs the project's opponents .", + "length": 70 + }, + { + "text": "Almost 200,000 of those will be on supposedly protected Green Belt land.", + "length": 72 + }, + { + "text": "For Philip Shirley finds himself at the centre of a very public skirmish.", + "length": 73 + }, + { + "text": "Ill-feeling over the proposal has united the local community in opposition.", + "length": 75 + }, + { + "text": "Sally Rawles, left, Louise Whiteley and Simon Pipe are pictured in the foreground .", + "length": 83 + }, + { + "text": "However, a friend of Mrs Shirley told me that her sympathies lie with the villagers.", + "length": 84 + }, + { + "text": "Indeed, I understand that his wife, Augusta, is vehemently opposed to the development.", + "length": 86 + }, + { + "text": "He's a greedy clot for doing it and she is very, very cross about it,' said the friend.", + "length": 87 + }, + { + "text": "' Which is awkward for the area's Tory MP, Nadhim Zahawi, who backs the project's opponents.", + "length": 92 + }, + { + "text": "'There are lots of Tory landowners benefiting from this planning loophole,' says Sally Rawles.", + "length": 94 + }, + { + "text": "The rule was supposed to encourage local government bureaucrats to create housing plans promptly.", + "length": 97 + }, + { + "text": "More than 400 people — one for every house in Ettington — have filed written objections to it.", + "length": 98 + }, + { + "text": "'Stratford council have said our village should have no more than 50 new homes between now and 2031.", + "length": 100 + }, + { + "text": "wind of it, Mr Shirley not only refused to comment but even refused to confirm that he owned the field.", + "length": 103 + }, + { + "text": "Multi-millionaire Philip Shirley is by many measures the most impeccably bred landowner in the country .", + "length": 104 + }, + { + "text": "This unassuming multi-millionaire is by many measures the most impeccably bred landowner in the country.", + "length": 104 + }, + { + "text": "'If they owned a field and were offered a chance to turn it into a few million quid, I bet they'd accept.", + "length": 105 + }, + { + "text": "Today, some 400 years later, talk of that 'valiant' spirit brings a wry smile to the lips of local people.", + "length": 106 + }, + { + "text": "In at least eight villages around Stratford-upon-Avon, developments of between 20 and 100 homes are mooted.", + "length": 107 + }, + { + "text": "Land Registry documents confirm that it belongs to a family trust based in the Isle of Man for tax purposes.", + "length": 108 + }, + { + "text": "Shirley, in partnership with a building company, plans to develop it to create several dozen 'executive' homes.", + "length": 111 + }, + { + "text": "The battle has not just divided the village but caused tensions among the members of Shirley's immediate family.", + "length": 112 + }, + { + "text": "The front view of Ettington Park, which sits on the banks of the river Stour in the heart of Shakespeare country .", + "length": 114 + }, + { + "text": "Augusta Shirley had agreed to attend the event as a guest of James Holloway, a local agent for the Conservative Party.", + "length": 118 + }, + { + "text": "For the longer the loophole remains open, the angrier the voters of Ettington, and many other villages, will surely get.", + "length": 120 + }, + { + "text": "It revolves around some six acres of farmland that he owns on the edge of Ettington, a couple of miles from the family seat.", + "length": 124 + }, + { + "text": "However, a few hours before it began she was advised not to attend due to the 'level of hostility' her presence might generate.", + "length": 127 + }, + { + "text": "As the CPRE revealed this week, 38 major developments, involving 8,700 new homes, were given the green light on appeal last year.", + "length": 129 + }, + { + "text": "She was said to be 'very cross' about her husband's decision and 'feels passionately' that planning permission should be refused.", + "length": 129 + }, + { + "text": "The other day Mr Zahawi, a No 10 policy adviser, published an open letter to the Government calling for the loophole to be closed.", + "length": 130 + }, + { + "text": "After a planning application was submitted last month, they began bombarding Stratford-upon-Avon's district council with complaints.", + "length": 132 + }, + { + "text": "It declared that the 'physical harm' being done by 'rapacious developers' threatens 'to become the defining legacy of this Government'.", + "length": 135 + }, + { + "text": "' Hostilities nearly spilled over at the White Horse pub last month when SHAPE held a quiz night to raise funds to oppose the development.", + "length": 138 + }, + { + "text": "'They have always run the village so we never thought they would do something to ruin it, especially when they already have so much money.", + "length": 138 + }, + { + "text": "Shirley's paternal ancestors have managed to hold land continuously in the 'manor' where Ettington Hall stands for almost a thousand years.", + "length": 139 + }, + { + "text": "This site — a field crossed by a well-trodden footpath, with views of the Cotswold Hills — is the subject of a fierce planning dispute.", + "length": 139 + }, + { + "text": "'It's the scale that is so outrageous,' says Sally Rawles, the leader of SHAPE, a lobby group with 200 local members opposing the development.", + "length": 142 + }, + { + "text": "For as the Campaign to Protect Rural England (CPRE) revealed this week, a staggering 729,000 homes are to be built on greenfield sites such as this.", + "length": 148 + }, + { + "text": "Most were in the constituencies of Tory MPs, leading some to wonder if anger over the issue could cost the party marginal seats at next year's election.", + "length": 152 + }, + { + "text": "'I believe two branches of the family fell out years ago when Philip, or one of his close relatives, wasn't invited to David Cameron's fifth birthday party.", + "length": 156 + }, + { + "text": "So evocative is the family history that William Shakespeare, an occasional house-guest at Ettington Park, wrote gushingly about 'the spirit of valiant Shirley'.", + "length": 160 + }, + { + "text": "A cousin by marriage to David Cameron, he is almost the only person in England whose can trace his lineage back to the Domesday Book by uninterrupted male descent.", + "length": 163 + }, + { + "text": "Locals, when they found out in September, were outraged by the project, which would increase the size of the pretty village, which has two pubs and one shop, by a fifth.", + "length": 169 + }, + { + "text": "A few miles outside Stratford-upon-Avon, on the banks of the river Stour in the heart of Shakespeare country, sits a spectacular neo-Gothic mansion called Ettington Park.", + "length": 170 + }, + { + "text": "' When I called at the farmhouse where the Shirley family live (these days they lease Ettington Park to a luxury hotel chain), Augusta also declined to discuss the affair.", + "length": 171 + }, + { + "text": "In 40 acres of parkland and filled with suits of armour, oil paintings and a selection of resident ghosts, the stately home is the family seat of 58-year-old Philip Shirley.", + "length": 173 + }, + { + "text": "'People always want to blame the landowner, but new homes have to go somewhere and half of these villagers who so oppose the development live on an estate built 15 years ago.", + "length": 174 + }, + { + "text": "'We never thought the Shirleys would do something like this,' says Mary-Ann Warmington, 56, whose grandmother, a parlourmaid, was one of many locals who worked at Ettington Park.", + "length": 178 + }, + { + "text": "Sir William Stratford Dugdale, Mr Shirley's uncle, is a wealthy businessman married to David Cameron's aunt Cecilia, although friends deny that the landowner has a close relationship with the PM.", + "length": 195 + }, + { + "text": "The village is dominated by banners and signs opposing the proposed estate, claiming that it will increase traffic, destroy a well-used footpath and place an intolerable strain on the local school.", + "length": 197 + }, + { + "text": "You probably haven't heard of Mr Shirley, a tall, somewhat shy accountant with a wife and four children, who inherited a large tranche of Warwickshire on the death of his father, Major John, in 2009.", + "length": 199 + }, + { + "text": "The trouble here began last year, when Philip Shirley struck a multi-million-pound deal to allow a developer, Charles Church Homes, to build 80 homes on the field, which has been in his family for generations.", + "length": 209 + }, + { + "text": "' I understand that Mr Shirley has not actually sold the field yet, but instead granted Charles Church Homes a legally binding 'option' to buy it, for several million pounds, provided planning permission is granted.", + "length": 215 + }, + { + "text": "It was decreed that, if no such plan was in place, home-builders who were refused planning permission for a project but appealed against that decision to central government would be likely to gain automatic consent.", + "length": 215 + }, + { + "text": "As regards the planning battle, though, similar disputes are playing out across rural Britain thanks to the housing boom, a loophole in planning regulations and the incompetence of more than half of our local authorities.", + "length": 221 + }, + { + "text": "Some of the building applications go back to 2010, when the Government altered planning law to require every local authority to create a 'core strategy' outlining where an allotted number of new homes should be built in the coming years.", + "length": 237 + }, + { + "text": "But three years after it came into operation, 178 of the country's 327 district councils still haven't managed to complete their 'core strategy' — temporarily giving developers carte blanche to build on almost any rural land they can buy.", + "length": 240 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4948985427618027 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:13.944956668Z", + "first_section_created": "2025-12-23T09:32:13.946481431Z", + "last_section_published": "2025-12-23T09:32:13.946795343Z", + "all_results_received": "2025-12-23T09:32:14.057777369Z", + "output_generated": "2025-12-23T09:32:14.058021379Z", + "total_processing_time_ms": 113, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 110, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:13.946481431Z", + "publish_time": "2025-12-23T09:32:13.946673638Z", + "first_worker_start": "2025-12-23T09:32:13.947423469Z", + "last_worker_end": "2025-12-23T09:32:14.056918Z", + "total_journey_time_ms": 110, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:13.947429169Z", + "start_time": "2025-12-23T09:32:13.947501072Z", + "end_time": "2025-12-23T09:32:13.947612277Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:13.947704Z", + "start_time": "2025-12-23T09:32:13.947851Z", + "end_time": "2025-12-23T09:32:14.056918Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 109 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:13.947491272Z", + "start_time": "2025-12-23T09:32:13.947552474Z", + "end_time": "2025-12-23T09:32:13.947731182Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:13.947334665Z", + "start_time": "2025-12-23T09:32:13.947423469Z", + "end_time": "2025-12-23T09:32:13.947499272Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:13.94670394Z", + "publish_time": "2025-12-23T09:32:13.946795343Z", + "first_worker_start": "2025-12-23T09:32:13.947552074Z", + "last_worker_end": "2025-12-23T09:32:14.030719Z", + "total_journey_time_ms": 84, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:13.947530873Z", + "start_time": "2025-12-23T09:32:13.947575675Z", + "end_time": "2025-12-23T09:32:13.947648278Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:13.947859Z", + "start_time": "2025-12-23T09:32:13.947976Z", + "end_time": "2025-12-23T09:32:14.030719Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:13.947494472Z", + "start_time": "2025-12-23T09:32:13.947552074Z", + "end_time": "2025-12-23T09:32:13.947739682Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:13.947531673Z", + "start_time": "2025-12-23T09:32:13.947579375Z", + "end_time": "2025-12-23T09:32:13.947642178Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 191, + "min_processing_ms": 82, + "max_processing_ms": 109, + "avg_processing_ms": 95, + "median_processing_ms": 109, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4429, + "slowest_section_id": 0, + "slowest_section_time_ms": 110 + } +} diff --git a/data/output/0007967cc5d659a3dd74bf3711e2290ba5aaf7fe.json b/data/output/0007967cc5d659a3dd74bf3711e2290ba5aaf7fe.json new file mode 100644 index 0000000..fc4422b --- /dev/null +++ b/data/output/0007967cc5d659a3dd74bf3711e2290ba5aaf7fe.json @@ -0,0 +1,226 @@ +{ + "file_name": "0007967cc5d659a3dd74bf3711e2290ba5aaf7fe.txt", + "total_words": 298, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "on", + "count": 9 + }, + { + "word": "fabregas", + "count": 8 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "chelsea", + "count": 7 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "to", + "count": 6 + }, + { + "word": "at", + "count": 5 + }, + { + "word": "ferencvaros", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Pa Reporter .", + "length": 13 + }, + { + "text": "Mourinho said on chelseafc.", + "length": 27 + }, + { + "text": "com: 'Frank is the history of the club, history nobody can forget.", + "length": 66 + }, + { + "text": "VIDEO Scroll down to watch Cesc Fabregas score amazing individual goal vs Ferencvaros .", + "length": 87 + }, + { + "text": "Star man: Fabregas (centre) has impressed during Chelsea's pre-season tour around Europe .", + "length": 90 + }, + { + "text": "'Fabregas is the future; history is history but the future is more important at the moment.", + "length": 91 + }, + { + "text": "On the run: Summer signing Cesc Fabregas takes on Ferencvaros' defence at the Groupama Arena .", + "length": 94 + }, + { + "text": "Looking ahead: Chelsea boss Jose Mourinho believes Fabregas is the right man to replace Lampard .", + "length": 97 + }, + { + "text": "Injured: Didier Drogba holds his right ankle after hurting himself in the 2-1 win against Ferencvaros .", + "length": 103 + }, + { + "text": "' Chelsea returned from Hungary late on Sunday night ahead of their match with Real Sociedad on Tuesday.", + "length": 104 + }, + { + "text": "'My opinion, and the opinion of my players, is that Fabregas is the right player to occupy that central position in midfield.", + "length": 125 + }, + { + "text": "Chelsea manager Jose Mourinho believes Cesc Fabregas is perfectly suited to filling the position Frank Lampard occupied for 13 years.", + "length": 133 + }, + { + "text": "It is still too soon to know the full extent of the ankle injury which forced Didier Drogba off during the first half against Ferencvaros.", + "length": 138 + }, + { + "text": "Fresh start: Former Chelsea midfielder Frank Lampard arrives at Wembley to watch his new side Manchester City take on Arsenal in the Community Shield .", + "length": 151 + }, + { + "text": "The 27-year-old former Arsenal captain signed from Barcelona in June after Chelsea record goalscorer Lampard left under freedom of contract at the end of last season.", + "length": 166 + }, + { + "text": "Fabregas scored in Chelsea's 2-1 win over Ferencvaros in Budapest on Sunday - on a day when Lampard was sitting in the stands for Manchester City after signing on loan from New York City FC - in the Blues' penultimate pre-season friendly before next Monday's Premier League opening contest at Burnley.", + "length": 301 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.44939911365509033 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:14.447571163Z", + "first_section_created": "2025-12-23T09:32:14.447869475Z", + "last_section_published": "2025-12-23T09:32:14.448014481Z", + "all_results_received": "2025-12-23T09:32:14.510510129Z", + "output_generated": "2025-12-23T09:32:14.510657235Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:14.447869475Z", + "publish_time": "2025-12-23T09:32:14.448014481Z", + "first_worker_start": "2025-12-23T09:32:14.448664607Z", + "last_worker_end": "2025-12-23T09:32:14.509509Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:14.448678608Z", + "start_time": "2025-12-23T09:32:14.44872861Z", + "end_time": "2025-12-23T09:32:14.448767711Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:14.448869Z", + "start_time": "2025-12-23T09:32:14.449017Z", + "end_time": "2025-12-23T09:32:14.509509Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:14.448671707Z", + "start_time": "2025-12-23T09:32:14.448723309Z", + "end_time": "2025-12-23T09:32:14.448777212Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:14.448612505Z", + "start_time": "2025-12-23T09:32:14.448664607Z", + "end_time": "2025-12-23T09:32:14.448696508Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1804, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0007a066bef7bbe52e8cd787ddb210215ac0f2f8.json b/data/output/0007a066bef7bbe52e8cd787ddb210215ac0f2f8.json new file mode 100644 index 0000000..7282e72 --- /dev/null +++ b/data/output/0007a066bef7bbe52e8cd787ddb210215ac0f2f8.json @@ -0,0 +1,444 @@ +{ + "file_name": "0007a066bef7bbe52e8cd787ddb210215ac0f2f8.txt", + "total_words": 819, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "were", + "count": 25 + }, + { + "word": "in", + "count": 22 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "they", + "count": 16 + }, + { + "word": "said", + "count": 15 + }, + { + "word": "that", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "to Sanjani.", + "length": 11 + }, + { + "text": "Apology: U.", + "length": 11 + }, + { + "text": "adult-sized.", + "length": 12 + }, + { + "text": "been buried.", + "length": 12 + }, + { + "text": "were Taliban.", + "length": 13 + }, + { + "text": "Graham Smith .", + "length": 14 + }, + { + "text": "I'm not saying they .", + "length": 21 + }, + { + "text": "'They weren't bombed .", + "length": 22 + }, + { + "text": "By then, the victims had .", + "length": 26 + }, + { + "text": "photographs of the corpses.", + "length": 27 + }, + { + "text": "'These were young Afghans - .", + "length": 29 + }, + { + "text": "and one was older, Wigston said.", + "length": 32 + }, + { + "text": "Forensic experts, who examined the .", + "length": 36 + }, + { + "text": "I'm not saying they were insurgents.", + "length": 36 + }, + { + "text": "doing lined up under that rock that day.", + "length": 40 + }, + { + "text": "the village until two days after the airstrike.", + "length": 47 + }, + { + "text": "Last updated at 4:32 PM on 15th February 2012 .", + "length": 47 + }, + { + "text": "Wigston said coalition officials did not reach .", + "length": 48 + }, + { + "text": "Wigston said: 'We may never know what they were .", + "length": 49 + }, + { + "text": "They were speaking at a press conference in Kabul .", + "length": 51 + }, + { + "text": "He said local officials showed the assessment team .", + "length": 52 + }, + { + "text": "They were bombed because we thought they were a threat.", + "length": 55 + }, + { + "text": "I have no doubt that they were carrying weapons,' Wigston .", + "length": 59 + }, + { + "text": "One group of eight headed for nearby mountains, Boone said.", + "length": 59 + }, + { + "text": "said, adding that it's not unusual for villagers to carry weapons.", + "length": 66 + }, + { + "text": "' Civilian casualties have long been a source of friction between the U.", + "length": 72 + }, + { + "text": "because they were Taliban, or because they were insurgents or smugglers.", + "length": 72 + }, + { + "text": "In the end, eight young Afghans lost their lives in this very sad event.", + "length": 72 + }, + { + "text": "photographs, said the victims were young teenagers around 15 years old .", + "length": 72 + }, + { + "text": "When they got cold, they gathered brush and lighted a fire to keep warm, he said.", + "length": 81 + }, + { + "text": "Along with 18 other children, the pair were pardoned by Afghan President Hamid Karzai.", + "length": 86 + }, + { + "text": "The children were apprehended in Kandahar province along with three other militants last week.", + "length": 94 + }, + { + "text": "' Coalition and Afghan security forces were searching the area for weapons and ammunition, he said.", + "length": 99 + }, + { + "text": "' People react seconds after a suicide blast targeting a Shi'ite Muslim gathering in Kabul in December.", + "length": 103 + }, + { + "text": "'They were observed moving in open terrain in a tactical fashion, clearly keeping distance from each other.", + "length": 107 + }, + { + "text": "Nato has voiced regret for the killing of eight civilians in an airstrike in eastern Afghanistan last week.", + "length": 107 + }, + { + "text": "'Despite all tactical directives being followed precisely, we now know the unfortunate result of this engagement.", + "length": 113 + }, + { + "text": "The top commander in Afghanistan today condemned the Taliban's use of child suicide bombers as 'utterly despicable'.", + "length": 116 + }, + { + "text": "General John Allen, the top commander in Afghanistan, has met with the provincial governor to express his condolences.", + "length": 118 + }, + { + "text": "General John Allen (pictured) was speaking after two ten-year-olds were arrested wearing explosive vests for a second time.", + "length": 123 + }, + { + "text": "Local authorities said those who died were seven boys between the ages of six and 14 and a mentally-ill young man aged around 18 to 20.", + "length": 135 + }, + { + "text": "-led international force and Afghan President Hamid Karzai, who condemned the bombing and sent a delegation to the scene to investigate.", + "length": 136 + }, + { + "text": "'In now promoting child-suicide attacks, the insurgency have forfeited any remaining support they may have had with the people of Afghanistan.", + "length": 142 + }, + { + "text": "Fearing the presence of the troops, the victims rounded up sheep and cows and moved them toward a mountainous area behind their homes, he said.", + "length": 143 + }, + { + "text": "Air Commodore Mike Wigston, the director of Nato air operations, said today that the international forces believed they were 'young teenagers' who were armed.", + "length": 158 + }, + { + "text": "Brigadier General Lewis Boone (left) and Air Commodore Mike Wigston voice regret for the killing of eight civilians in an airstrike in eastern Afghanistan last week.", + "length": 165 + }, + { + "text": "Using binoculars and other equipment, ground forces identified several groups of Afghan males leaving a village at different times and going in different directions.", + "length": 165 + }, + { + "text": "'We stand side by side with the Afghan people to do everything we can to eliminate suicide bombers of any age from the cities, towns and villages throughout the country.", + "length": 169 + }, + { + "text": "' Zalmai Ayubi, a Kandahar province spokesman, yesterday confirmed that the ten-year-olds were previously arrested by security forces wearing explosive vests last August.", + "length": 170 + }, + { + "text": "General Allen said in a statement:  'The cold tactic of using any human being - especially children - to conduct suicide attacks is utterly despicable, and I denounce these tactics.", + "length": 182 + }, + { + "text": "Army Brigadier General Lewis Boone, director of public affairs for the coalition, said: 'The aircraft dropped two bombs on the group that we believed to be an imminent threat to our people.", + "length": 189 + }, + { + "text": "The coalition dropped two bombs on February 8 in Najrab district in Kapisa province, after movements by eight people on the ground were assessed as a threat to Afghan police and Nato forces in the area.", + "length": 202 + }, + { + "text": "'Their purposeful movements and the weapons they were seen to be carrying led the ground commander to believe this group was getting ready to attack and were an imminent threat to the Afghan National Police and coalition forces in the valley.", + "length": 242 + }, + { + "text": "The top Nato commander in Afghanistan today condemned the Taliban's use of child suicide bombers as 'utterly despicable' Hussain Khan Sanjani, the leader of the Kapisa provincial council who visited the area, said residents told him that before the airstrike, coalition aircraft were patrolling overhead as other forces searched homes.", + "length": 335 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7062171697616577 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:14.948774799Z", + "first_section_created": "2025-12-23T09:32:14.949373624Z", + "last_section_published": "2025-12-23T09:32:14.949700637Z", + "all_results_received": "2025-12-23T09:32:15.033534255Z", + "output_generated": "2025-12-23T09:32:15.033691862Z", + "total_processing_time_ms": 84, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 83, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:14.949373624Z", + "publish_time": "2025-12-23T09:32:14.949590132Z", + "first_worker_start": "2025-12-23T09:32:14.950251159Z", + "last_worker_end": "2025-12-23T09:32:15.025779Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:14.95075178Z", + "start_time": "2025-12-23T09:32:14.950824983Z", + "end_time": "2025-12-23T09:32:14.950933087Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:14.951115Z", + "start_time": "2025-12-23T09:32:14.951239Z", + "end_time": "2025-12-23T09:32:15.025779Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:14.950546471Z", + "start_time": "2025-12-23T09:32:14.950603374Z", + "end_time": "2025-12-23T09:32:14.950736579Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:14.950171556Z", + "start_time": "2025-12-23T09:32:14.950251159Z", + "end_time": "2025-12-23T09:32:14.950324062Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:14.949640834Z", + "publish_time": "2025-12-23T09:32:14.949700637Z", + "first_worker_start": "2025-12-23T09:32:14.950339663Z", + "last_worker_end": "2025-12-23T09:32:15.032642Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:14.950426166Z", + "start_time": "2025-12-23T09:32:14.950474668Z", + "end_time": "2025-12-23T09:32:14.950481969Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:14.951114Z", + "start_time": "2025-12-23T09:32:14.95124Z", + "end_time": "2025-12-23T09:32:15.032642Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 81 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:14.95052217Z", + "start_time": "2025-12-23T09:32:14.950561672Z", + "end_time": "2025-12-23T09:32:14.950564572Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:14.950312962Z", + "start_time": "2025-12-23T09:32:14.950339663Z", + "end_time": "2025-12-23T09:32:14.950340263Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 155, + "min_processing_ms": 74, + "max_processing_ms": 81, + "avg_processing_ms": 77, + "median_processing_ms": 81, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2505, + "slowest_section_id": 1, + "slowest_section_time_ms": 83 + } +} diff --git a/data/output/0007e1735d8bfa91e380c9e9c18879399998d6e7.json b/data/output/0007e1735d8bfa91e380c9e9c18879399998d6e7.json new file mode 100644 index 0000000..41d1290 --- /dev/null +++ b/data/output/0007e1735d8bfa91e380c9e9c18879399998d6e7.json @@ -0,0 +1,274 @@ +{ + "file_name": "0007e1735d8bfa91e380c9e9c18879399998d6e7.txt", + "total_words": 417, + "top_n_words": [ + { + "word": "the", + "count": 24 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "their", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "s", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "caryn", + "count": 7 + }, + { + "word": "family", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Cover your ears!", + "length": 16 + }, + { + "text": "' one commenter wrote.", + "length": 22 + }, + { + "text": "'What a wonderful video!", + "length": 24 + }, + { + "text": "Congratulations on your great news!", + "length": 35 + }, + { + "text": "Other family members follow her lead.", + "length": 37 + }, + { + "text": "there's more' and a sonogram pops up.", + "length": 37 + }, + { + "text": "there's more' before a sonogram pops up .", + "length": 41 + }, + { + "text": "The Canatellas first baby is due in mid-August.", + "length": 47 + }, + { + "text": "Many viewers have applauded their novel approach.", + "length": 49 + }, + { + "text": "Tears of joy: Other family members join the celebration .", + "length": 57 + }, + { + "text": "Keep watching: A script on screen then reads 'but wait...", + "length": 57 + }, + { + "text": "Look of shock: The Canatellas first baby is due in mid-August .", + "length": 63 + }, + { + "text": "You have such a lovely family which is reflected in their reactions!!", + "length": 69 + }, + { + "text": "The couple gifted the novel film to Caryn's parents on Christmas day.", + "length": 69 + }, + { + "text": "Artisitic duo: Many viewers have applauded the couple's novel approach .", + "length": 72 + }, + { + "text": "Caryn's mother is seen springing up from the couch and waving her arms in the air.", + "length": 82 + }, + { + "text": "The whole family then sat down to watch the footage with their reactions caught on camera.", + "length": 90 + }, + { + "text": "Their smiling reactions were also filmed and more video updates are promised in the future.", + "length": 91 + }, + { + "text": "The couple broke their happy news to Bryan's parents by gifting them a framed pregnancy scan.", + "length": 93 + }, + { + "text": "Overjoyed: Caryn's mother is seen springing up from the couch and waving her arms in the air .", + "length": 94 + }, + { + "text": "To date the video of their surprise pregnancy announcement has been watched more than 90,000 times.", + "length": 99 + }, + { + "text": "Clever idea: They created a short film showing a montage of family clips, which comes to an abrupt end .", + "length": 104 + }, + { + "text": "Meanwhile, a younger boy in the room stands with his hands over his ears as the noisy commotion hits him.", + "length": 105 + }, + { + "text": "This is the moment a couple make a surprise pregnancy announcement to their families triggering manic squeals of joy.", + "length": 117 + }, + { + "text": "Popular pick: To date the video of the Canatellas surprise pregnancy announcement has been watched more than 90,000 times .", + "length": 124 + }, + { + "text": "They created a short film showing a montage of family clips, which comes to an abrupt end before script on screen reads 'but wait...", + "length": 132 + }, + { + "text": "Caryn and Bryan Canatella, who run a wedding videography business in Austin, Texas, decided to use their love of film and unleash their baby news via DVD.", + "length": 154 + }, + { + "text": "TV time: The couple gifted the novel film to Caryn's parents on Christmas day - The whole family then sat down to watch the footage with their reactions caught on camera .", + "length": 171 + }, + { + "text": "A very special present: Caryn and Bryan Canatella, who run a wedding videography business in Austin, Texas, decided to use their love of film and unleash their baby news via DVD - Caryn's mother is pictured here .", + "length": 213 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.40433216094970703 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:15.450464755Z", + "first_section_created": "2025-12-23T09:32:15.450803669Z", + "last_section_published": "2025-12-23T09:32:15.451018378Z", + "all_results_received": "2025-12-23T09:32:15.515988027Z", + "output_generated": "2025-12-23T09:32:15.516128633Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:15.450803669Z", + "publish_time": "2025-12-23T09:32:15.451018378Z", + "first_worker_start": "2025-12-23T09:32:15.451741608Z", + "last_worker_end": "2025-12-23T09:32:15.513859Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:15.45181081Z", + "start_time": "2025-12-23T09:32:15.451866913Z", + "end_time": "2025-12-23T09:32:15.451920115Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:15.45203Z", + "start_time": "2025-12-23T09:32:15.452165Z", + "end_time": "2025-12-23T09:32:15.513859Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:15.451778709Z", + "start_time": "2025-12-23T09:32:15.451843512Z", + "end_time": "2025-12-23T09:32:15.451919815Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:15.451685805Z", + "start_time": "2025-12-23T09:32:15.451741608Z", + "end_time": "2025-12-23T09:32:15.451780509Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2391, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/00083697263e215e5e7eda753070f08aa374dd45.json b/data/output/00083697263e215e5e7eda753070f08aa374dd45.json new file mode 100644 index 0000000..0517778 --- /dev/null +++ b/data/output/00083697263e215e5e7eda753070f08aa374dd45.json @@ -0,0 +1,258 @@ +{ + "file_name": "00083697263e215e5e7eda753070f08aa374dd45.txt", + "total_words": 541, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "his", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "said", + "count": 10 + }, + { + "word": "was", + "count": 10 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "arrigoni", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "Thursday.", + "length": 9 + }, + { + "text": "\" Arrigoni's colleagues last saw him about 8 p.", + "length": 47 + }, + { + "text": "Medical sources said his body was taken to Shifa Hospital in Gaza.", + "length": 66 + }, + { + "text": "\"He was available everywhere to support all the poor people, the victims.", + "length": 73 + }, + { + "text": "The writing calls Italy an \"infidel nation whose armies are still present in Muslim lands.", + "length": 90 + }, + { + "text": "\"I think he was just the first foreigner they ran across,\" Catron said about the abductors.", + "length": 91 + }, + { + "text": "An autopsy revealed that he had been killed hours before police entered the location, it said.", + "length": 94 + }, + { + "text": "He was granted honorary citizenship for his work for the Palestinian people, the statement said.", + "length": 96 + }, + { + "text": "Wednesday near the port in Gaza City, said Joe Catron, a member of the International Solidarity Movement.", + "length": 105 + }, + { + "text": "Journalist Talal Abu-Rahmi, CNN's Yasmin Amer, Tom Watkins and Erin McLaughlin contributed to this story .", + "length": 106 + }, + { + "text": "\"It is ironic they happened to come across someone who has dedicated a part of his life to helping Palestine.", + "length": 109 + }, + { + "text": "\" Arrigoni, who was also working as a freelance journalist, was from the northern Italian region of Lombardy.", + "length": 109 + }, + { + "text": "The grisly outcome came hours after a video was posted on YouTube showing a man identified by his colleagues as Arrigoni.", + "length": 121 + }, + { + "text": "A black blindfold covered his eyes; his right cheek appeared red as though it had been hit; his hands appeared to be bound behind his back.", + "length": 139 + }, + { + "text": "A hand belonging to someone outside of the view of the lens appeared to be grasping his hair on the back and pointing the captive's head toward the camera.", + "length": 155 + }, + { + "text": "Police investigating the case learned where 36-year-old Vittorio Arrigoni was being held and went to the location, where they found the body, the statement said.", + "length": 161 + }, + { + "text": "\"Vittorio Arrigoni is a hero of Palestine,\" said a statement released by Khalil Shaheen, head of the economics and social rights department at the Palestinian Centre for Human Rights.", + "length": 183 + }, + { + "text": "Arabic writing scrolled over the video threatened that Arrigoni would be killed if Hesham al-Saeedni, who has been held for nearly a year by Hamas, were not released within 30 hours of 11 a.", + "length": 190 + }, + { + "text": "Al-Saeedni is the leader of a group that may have been inspired by al Qaeda, said Alfredo Tradardi, the Italy coordinator for the International Solidarity Movement, where Arrigoni was a volunteer.", + "length": 196 + }, + { + "text": "Gaza City (CNN) -- An Italian humanitarian activist and journalist who was kidnapped in Gaza has been found dead and one person is in custody, the Hamas Interior Ministry said Friday in a statement.", + "length": 198 + }, + { + "text": "\" Tradardi, who had initially expressed optimism that his co-worker would be released unharmed, told CNN in a telephone interview that the outcome underscores the need for progress to be made toward Middle East peace.", + "length": 217 + }, + { + "text": "\"Now, we have to work more deeply to try to change the foreign policy of our government, of the European government, of the United States government in order that they could press Israel to solve the problem of the Palestinians.", + "length": 228 + }, + { + "text": "\" Arrigoni had been active in the Palestinian cause for nearly a decade, and had been allied with the International Solidarity Movement for more than two years, \"monitoring human rights violations by Israel, supporting the Palestinian popular resistance against the Israeli occupation and disseminating information about the situation in Gaza to his home country of Italy,\" the non-governmental organization said in a written statement.", + "length": 436 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5688349008560181 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:15.951233574Z", + "first_section_created": "2025-12-23T09:32:15.951563988Z", + "last_section_published": "2025-12-23T09:32:15.951720594Z", + "all_results_received": "2025-12-23T09:32:16.011364026Z", + "output_generated": "2025-12-23T09:32:16.011656138Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:15.951563988Z", + "publish_time": "2025-12-23T09:32:15.951720594Z", + "first_worker_start": "2025-12-23T09:32:15.952413922Z", + "last_worker_end": "2025-12-23T09:32:16.010411Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:15.952377021Z", + "start_time": "2025-12-23T09:32:15.952449924Z", + "end_time": "2025-12-23T09:32:15.952511826Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:15.952555Z", + "start_time": "2025-12-23T09:32:15.952703Z", + "end_time": "2025-12-23T09:32:16.010411Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:15.952366221Z", + "start_time": "2025-12-23T09:32:15.952445624Z", + "end_time": "2025-12-23T09:32:15.952576329Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:15.952326619Z", + "start_time": "2025-12-23T09:32:15.952413922Z", + "end_time": "2025-12-23T09:32:15.952475225Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3241, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/00092a7572e6e1d254a6d5f079c4d72d33435667.json b/data/output/00092a7572e6e1d254a6d5f079c4d72d33435667.json new file mode 100644 index 0000000..222932b --- /dev/null +++ b/data/output/00092a7572e6e1d254a6d5f079c4d72d33435667.json @@ -0,0 +1,638 @@ +{ + "file_name": "00092a7572e6e1d254a6d5f079c4d72d33435667.txt", + "total_words": 1800, + "top_n_words": [ + { + "word": "the", + "count": 112 + }, + { + "word": "and", + "count": 62 + }, + { + "word": "to", + "count": 53 + }, + { + "word": "a", + "count": 42 + }, + { + "word": "of", + "count": 40 + }, + { + "word": "in", + "count": 28 + }, + { + "word": "on", + "count": 25 + }, + { + "word": "boone", + "count": 22 + }, + { + "word": "fire", + "count": 19 + }, + { + "word": "at", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "com.", + "length": 4 + }, + { + "text": "estate.", + "length": 7 + }, + { + "text": "Russ Davies.", + "length": 12 + }, + { + "text": "Love to all.", + "length": 12 + }, + { + "text": "'It's unreal.", + "length": 13 + }, + { + "text": "However, Capt.", + "length": 14 + }, + { + "text": "'Life is fragile.", + "length": 17 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "We have no further comment.", + "length": 27 + }, + { + "text": "A spokesman told DailyMail.", + "length": 27 + }, + { + "text": "Two remain unaccounted for .", + "length": 28 + }, + { + "text": "The fire was reported about 3.", + "length": 30 + }, + { + "text": "Thank you everyone so very much.", + "length": 32 + }, + { + "text": "The image above was taken in 2012 .", + "length": 35 + }, + { + "text": "It has an estimated $9million value.", + "length": 36 + }, + { + "text": "'Our love for our family is boundless.", + "length": 38 + }, + { + "text": "It means so much to us all at this time.", + "length": 40 + }, + { + "text": "Make time today to embrace your loved ones.", + "length": 43 + }, + { + "text": "We ask that you respect our need for privacy.", + "length": 45 + }, + { + "text": "She described them as 'genuinely nice people'.", + "length": 46 + }, + { + "text": "She dreamed of one day running an animal rescue.", + "length": 48 + }, + { + "text": "All one big nightmare that I can't wake up from.", + "length": 48 + }, + { + "text": "It is not known whether the bodies children of adults.", + "length": 54 + }, + { + "text": "He would hold fundraisers for worthy causes at his house.", + "length": 57 + }, + { + "text": "ScienceLogic would only confirm that Pyle works for them.", + "length": 57 + }, + { + "text": "com: 'Don Pyle is the Chief Operating Officer for ScienceLogic.", + "length": 63 + }, + { + "text": "Some 85 firefighters from several jurisdictions fought the fire.", + "length": 64 + }, + { + "text": "Inferno: The blaze, pictured from the air, tore through the home.", + "length": 65 + }, + { + "text": "Davies said hot spots took about 10 hours to extinguish on Monday.", + "length": 66 + }, + { + "text": "Our loss demands time and quiet reflection to process these feelings.", + "length": 69 + }, + { + "text": "The blaze has reduced the 16,000-square-foot waterfront castle to ruins.", + "length": 72 + }, + { + "text": "We want our community to know how proud we are of all who have helped us.", + "length": 73 + }, + { + "text": "The bodies were with the medical examiner and have not yet been identified.", + "length": 75 + }, + { + "text": "Neighbor Caroline Wugofski said the Pyles often held parties at their home.", + "length": 75 + }, + { + "text": "family, friends, and neighbors have come together for us in our time of need.", + "length": 77 + }, + { + "text": "'There was a fire at his home last night and the case is under investigation.", + "length": 77 + }, + { + "text": "Tech tycoon Don Pyle, 56, lived in the 16,000sq ft home with his wife Sandra.", + "length": 77 + }, + { + "text": "30am on Monday by an alarm-monitoring company and a neighbor who spotted flames.", + "length": 80 + }, + { + "text": "Real estate listings for the home say it had seven bathrooms and was part of a 7.", + "length": 81 + }, + { + "text": "Six people were inside the house - two grandparents and their four grandchildren .", + "length": 82 + }, + { + "text": "I am trying to read all the messages and thank you so much for reaching out to us.", + "length": 82 + }, + { + "text": "Federal and local investigators returned to the site to continue the search on Thursday.", + "length": 88 + }, + { + "text": "Don and Sandra Pyle were home the night of the blaze along with their four grandchildren .", + "length": 90 + }, + { + "text": "The modern, two-storey home, which was built in 2005, also has attic space and a basement.", + "length": 90 + }, + { + "text": "Mansion fire: The $9million home in Annapolis, Maryland, caught fire around 3:30am Monday .", + "length": 91 + }, + { + "text": "Although way too short, our babies lives were incredibly happy and they were loved by so many.", + "length": 94 + }, + { + "text": "The family described Don and Sandra Pyle as loving grandparents called 'Pop-Pop' and 'Dee-Dee'.", + "length": 95 + }, + { + "text": "His latest role was as the COO of ScienceLogic, an IT support company based in Reston, Virginia.", + "length": 96 + }, + { + "text": "Bottom right, firefighters can be seen standing near the pools as they spray water at the ruins .", + "length": 97 + }, + { + "text": "They had even stopped off at a Target store on the way to pick up themed costumes for the evening.", + "length": 98 + }, + { + "text": "' The Pyles and their four grandchildren have been missing since crews were called early on Monday.", + "length": 99 + }, + { + "text": "His luscious property stood facing Church Creek, a picturesque inlet not far from the Chesapeake Bay.", + "length": 101 + }, + { + "text": "The mesage said: 'Clint and I would like to thank everyone again for all of your messages and support.", + "length": 102 + }, + { + "text": "One area flared up on Tuesday, and Davies said crews were monitoring it and extinguishing it as needed.", + "length": 103 + }, + { + "text": "Family night: Grandparents Don and Sandy Pyle had their four grandchildren over on Sunday for a sleepover .", + "length": 107 + }, + { + "text": "Firefighters took about 10 hours to extinguish hot spots on Monday and one area flared up on Tuesday again.", + "length": 108 + }, + { + "text": "Picturesque: In 2008, Pyle said the house had been built to be a combination of a castle and a beach house .", + "length": 108 + }, + { + "text": "Six-year-old Wesley looked up to his sister and wanted to build robots as he grew older, a family member said.", + "length": 110 + }, + { + "text": "Officials said it is unclear whether an alarm sounded inside the home, which might have alerted anyone inside.", + "length": 110 + }, + { + "text": "Even though remains of all six have not yet been found, all of those inside are believed to have died in the fire.", + "length": 114 + }, + { + "text": "The official in charge of the ATF's investigation would not confirm whether the blaze was suspicious to DailyMail.", + "length": 114 + }, + { + "text": "Don Pyle grew up in nearby Baltimore County and worked his way to the top table of several billion-dollar tech companies.", + "length": 121 + }, + { + "text": "Family members have since paid tribute to the youngsters and grandparents who are believed to have been caught up in the fire.", + "length": 126 + }, + { + "text": "I am thankful for the 16 years I had with them too, as I'm sure everyone else who had the pleasure of knowing them is as well.", + "length": 126 + }, + { + "text": "Treat: The grandparents took the grandchildren to this Medieval Times restaurant in Hannover, Maryland, hours before the fire .", + "length": 127 + }, + { + "text": "Emotional: The message put on Facebook by Eve Morrison, mother to two of the child victims, Charlotte, 8, and Wesley Boone, 6 .", + "length": 127 + }, + { + "text": "Because there was no hydrant in the area, firefighters shuttled tankers to the site and stationed a fire boat at a pier nearby.", + "length": 127 + }, + { + "text": "Four unidentified bodies have been pulled out of the wreckage so far while investigators try to determine what caused the fire.", + "length": 127 + }, + { + "text": "Three people embrace outside the gates of the home, where Investigators found the bodies of two unidentified people on Wednesday .", + "length": 131 + }, + { + "text": "Tragedy: A firefighter uses red tape to mark out areas at the Annapolis home following a blaze which entirely destroyed the property.", + "length": 133 + }, + { + "text": "Situated on its own portion of exclusive Childs Point Road, the home boasted a courtyard, swimming pool and access to a speedboat jetty.", + "length": 136 + }, + { + "text": "Eve Morrison, the wife of Clint Boone, and the mother of Charlotte and Wesley, posted a message to Facebook about the tragedy on Thursday.", + "length": 138 + }, + { + "text": "According to a family statement, Charlotte, eight, was a 'fun-loving and intelligent,' girl who loved making videos with her pet guinea pig.", + "length": 140 + }, + { + "text": "Special Agent David Cheplak, a spokesman for the ATF's Baltimore field office, said earlier this week there were no immediate signs of foul play.", + "length": 145 + }, + { + "text": "XOXO' The post added: 'And of course Don and Sandy 'Dee Dee and Pop Pop' the two most incredible, generous, hilarious people anyone could ever know.", + "length": 148 + }, + { + "text": "'I never knew that I could hurt this badly,' Stacey Boone, mother to Lexi and Katie with Sandra Pyle's son Randy Boone, wrote on Facebook on Tuesday.", + "length": 149 + }, + { + "text": "According to The Baltimore Sun, Randy and Stacey Boone, parents of Lexi and Katie, have a three-week-old son who was not at the Pyles' home on Sunday.", + "length": 150 + }, + { + "text": "' Investigators on Wednesday found two bodies in the charred remains of the home in Annapolis, Maryland after the property burned to the ground on Monday.", + "length": 155 + }, + { + "text": "Crews continued to search the Maryland mansion for bodies on Thursday, saying that they hoped to give the devastated families closure as soon as possible .", + "length": 155 + }, + { + "text": "Feared dead: Siblings Charlotte Boone, 8 (left), and Wesley Boone, 6, (right) were in the house with their cousins and grandparents when the fire broke out .", + "length": 157 + }, + { + "text": "The following morning the 16,000ft waterfront Annapolis property they were sleeping in had been burned to the ground - with all six believed to have been killed.", + "length": 161 + }, + { + "text": "Howarth said that is a common practice when there are no eyewitnesses, and it means only that anything recovered in the investigation would be admissible in court.", + "length": 163 + }, + { + "text": "Crews found two bodies on Thursday, the second day of excavating the site, and two bodies on Wednesday, according to Anne Arundel County Fire Department spokesman Capt.", + "length": 168 + }, + { + "text": "At one in 2008, he told the Baltimore Sun the house had been especially designed to be a combination of a castle, his preference and a beach house, which Sandra wanted.", + "length": 168 + }, + { + "text": "Robert Howarth, commander of the county fire department's fire and explosives investigation unit, said on Tuesday that investigators were treating the site as a crime scene.", + "length": 173 + }, + { + "text": "Photographs of the mansion before it burned down show at least four cars parked outside the elegant stone construction, which has several turrets as part of its architecture.", + "length": 174 + }, + { + "text": "Before: The 16,000sq ft property boasts two swimming pools and its own courtyard, as well as two outbuildings, one of which is connected to the main house by a covered portion.", + "length": 176 + }, + { + "text": "Four young cousins enjoyed a night out at a themed restaurant with their grandparents just hours before a deadly blaze ripped through the $9million mansion they were sleeping in.", + "length": 178 + }, + { + "text": "Trapped: Don and Sandra Pyle (pictured), were at home at the time of the fire with their grandchildren: Alexis Boone, 8; Kaitlyn Boone, 7; Charlotte Boone, 8; and Wesley Boone, 6.", + "length": 180 + }, + { + "text": "'We recognize the dedicated efforts from Anne Arundel County Fire and Police Departments, the Naval Academy, the Bureau of Alcohol, Tobacco, and Firearms, and other first responders.", + "length": 182 + }, + { + "text": "In a recent interview with the Washington Post, he said he started off as a salesman, and moved to progressively more important roles before taking the reins at companies with links to Silicon Valley.", + "length": 200 + }, + { + "text": "Sources at the Bureau of Alcohol, Tobacco, Firearms and Explosives told CNN that the fire engulfed the mansion so fast that they suspect that chemicals may have been used to accelerate the burning.", + "length": 200 + }, + { + "text": "Alexis Boone, eight, Kaitlyn Boone, seven, Charlotte Boone, eight, and six-year-old Wesley Boone were missing following the fire at their grandparents' Annapolis mansion, their family said Wednesday .", + "length": 202 + }, + { + "text": "Lexi, eight, was looking forward to her up and coming communion and had ambitions of being a vet or on television, while her younger sister Katie loved Taylor Smith and was 'thoughtful beyond her years'.", + "length": 203 + }, + { + "text": "Unaccounted for: Sisters Alexis 'Lexi' Boone, 8 (left), and Kaitlyn 'Katie' Boone, 7 (right), are both missing after being trapped in the Annapolis mansion of their grandparents as it caught fire early Monday .", + "length": 210 + }, + { + "text": "A previous family statement given to WUSA9 read: 'On behalf of the Boone and Pyle families, we wish to express our gratitude and appreciation for the love and support being shared with us during this tragic event.", + "length": 215 + }, + { + "text": "Special Agent William McMullan said that specialists from across the country were headed to Annapolis, and would send in trained dogs to sniff out any traces of accelerant chemicals, which would help determine whether the blaze could have been a deliberate attack.", + "length": 264 + }, + { + "text": "Don Pyle and his wife, Sandra took Alexis (Lexi) Boone, eight, Kaitlyn (Katie) Boone, seven, Charlotte Boone, eight, and Wesley (Wes) Boone, six, to a Medieval Times restaurant in Hannover, Maryland, as a special treat on Sunday night, according to a family spokesman.", + "length": 268 + }, + { + "text": "Historic interior: The pair even stopped at a Target nearby to pick up some fancy dress outfits to fit in with the theme of the restaurant (pictured) Wreck: Two bodies were recovered Thursday afternoon in the rubble of a multimillion-dollar mansion in Annapolis that burned to the ground earlier this week.", + "length": 307 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5432522892951965 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:16.452543115Z", + "first_section_created": "2025-12-23T09:32:16.454204283Z", + "last_section_published": "2025-12-23T09:32:16.454514996Z", + "all_results_received": "2025-12-23T09:32:16.616041382Z", + "output_generated": "2025-12-23T09:32:16.616320793Z", + "total_processing_time_ms": 163, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 161, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:16.454204283Z", + "publish_time": "2025-12-23T09:32:16.454403991Z", + "first_worker_start": "2025-12-23T09:32:16.455168522Z", + "last_worker_end": "2025-12-23T09:32:16.547212Z", + "total_journey_time_ms": 93, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:16.455087719Z", + "start_time": "2025-12-23T09:32:16.455168522Z", + "end_time": "2025-12-23T09:32:16.455303528Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:16.455584Z", + "start_time": "2025-12-23T09:32:16.455818Z", + "end_time": "2025-12-23T09:32:16.547212Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:16.455134321Z", + "start_time": "2025-12-23T09:32:16.455295427Z", + "end_time": "2025-12-23T09:32:16.455479735Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:16.455156822Z", + "start_time": "2025-12-23T09:32:16.455297228Z", + "end_time": "2025-12-23T09:32:16.455378831Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:16.454413191Z", + "publish_time": "2025-12-23T09:32:16.454483094Z", + "first_worker_start": "2025-12-23T09:32:16.45535103Z", + "last_worker_end": "2025-12-23T09:32:16.559432Z", + "total_journey_time_ms": 105, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:16.455552738Z", + "start_time": "2025-12-23T09:32:16.455670043Z", + "end_time": "2025-12-23T09:32:16.455776647Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:16.455731Z", + "start_time": "2025-12-23T09:32:16.455849Z", + "end_time": "2025-12-23T09:32:16.559432Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 103 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:16.455675443Z", + "start_time": "2025-12-23T09:32:16.455725545Z", + "end_time": "2025-12-23T09:32:16.455867751Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:16.455147721Z", + "start_time": "2025-12-23T09:32:16.45535103Z", + "end_time": "2025-12-23T09:32:16.455487835Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 2, + "creation_time": "2025-12-23T09:32:16.454502095Z", + "publish_time": "2025-12-23T09:32:16.454514996Z", + "first_worker_start": "2025-12-23T09:32:16.455583739Z", + "last_worker_end": "2025-12-23T09:32:16.615088Z", + "total_journey_time_ms": 160, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:16.455712644Z", + "start_time": "2025-12-23T09:32:16.455729645Z", + "end_time": "2025-12-23T09:32:16.455739846Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:16.548375Z", + "start_time": "2025-12-23T09:32:16.548441Z", + "end_time": "2025-12-23T09:32:16.615088Z", + "queue_wait_time_ms": 93, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:16.455756246Z", + "start_time": "2025-12-23T09:32:16.455781147Z", + "end_time": "2025-12-23T09:32:16.455799548Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:16.455563338Z", + "start_time": "2025-12-23T09:32:16.455583739Z", + "end_time": "2025-12-23T09:32:16.45560704Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 3, + "total_processing_ms": 260, + "min_processing_ms": 66, + "max_processing_ms": 103, + "avg_processing_ms": 86, + "median_processing_ms": 91, + "total_queue_wait_ms": 95, + "avg_queue_wait_ms": 31 + }, + "topn": { + "worker_type": "topn", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 3, + "average_section_size": 3508, + "slowest_section_id": 2, + "slowest_section_time_ms": 160 + } +} diff --git a/data/output/00093881f35822a402c1f0f736e2c9a405d736db.json b/data/output/00093881f35822a402c1f0f736e2c9a405d736db.json new file mode 100644 index 0000000..7c30ddb --- /dev/null +++ b/data/output/00093881f35822a402c1f0f736e2c9a405d736db.json @@ -0,0 +1,444 @@ +{ + "file_name": "00093881f35822a402c1f0f736e2c9a405d736db.txt", + "total_words": 1293, + "top_n_words": [ + { + "word": "i", + "count": 40 + }, + { + "word": "to", + "count": 40 + }, + { + "word": "of", + "count": 32 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "the", + "count": 28 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "my", + "count": 18 + }, + { + "word": "that", + "count": 18 + }, + { + "word": "as", + "count": 16 + }, + { + "word": "in", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": ".", + "length": 1 + }, + { + "text": ".", + "length": 1 + }, + { + "text": "Cue redness.", + "length": 12 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Other than my blushing that is .", + "length": 32 + }, + { + "text": "Other responses can include a dry mouth and racing heart.", + "length": 57 + }, + { + "text": "I was worried I would get teased for being the clever one.", + "length": 58 + }, + { + "text": "Rather like the pose Princess Diana struck in early pictures.", + "length": 61 + }, + { + "text": "In the morning I spent five minutes searching for my glasses.", + "length": 61 + }, + { + "text": "On Tuesday, I managed to clock up two embarrassing moments before lunch.", + "length": 72 + }, + { + "text": "‘Women tend to score higher than men on most levels of anxiety,’ he says.", + "length": 77 + }, + { + "text": "Then in my local cafe, I bumped into an old flame I hadn’t seen since school.", + "length": 79 + }, + { + "text": "In fact, to say that I am easily embarrassed is something of an understatement.", + "length": 79 + }, + { + "text": "I enjoy celebrating my birthday but singing and speeches are strictly forbidden.", + "length": 80 + }, + { + "text": "A blush doesn’t just show on your face — your stomach lining also turns red .", + "length": 81 + }, + { + "text": "Certainly, my own mother was especially timid, while my dad was an outgoing charmer.", + "length": 84 + }, + { + "text": "‘You’re not in control of this physical response and you may turn red as a result.", + "length": 86 + }, + { + "text": "’ I’ve lived with my blushing for so long now that I’ve almost come to accept it.", + "length": 87 + }, + { + "text": "An easily embarrassed child, it seems, can turn even a positive situation into a negative.", + "length": 90 + }, + { + "text": "’ My hand instantly flew up to cover the pinky-hue I could feel spreading across my cheeks.", + "length": 93 + }, + { + "text": "For those of us who continue to be afflicted, blushing in itself remains a lifelong embarrassment.", + "length": 98 + }, + { + "text": "For as long as I can remember I have consistently blushed bright pink at the slightest provocation.", + "length": 99 + }, + { + "text": "But seven times per week doesn’t sound that often to me — I blush profusely at least once a day.", + "length": 100 + }, + { + "text": "Professor Edelmann says: ‘Blushing shows you have a degree of humility, and that can be attractive.", + "length": 101 + }, + { + "text": "Some 200 operations to cure blushing are done every year on the NHS, with a 90 per cent success rate.", + "length": 101 + }, + { + "text": "That’s exactly why I kept schtum throughout my childhood and hated being picked out for special attention.", + "length": 108 + }, + { + "text": "Even now — as a grown woman of 62 — I only have to get slightly anxious to flush a bright shade of fuchsia.", + "length": 111 + }, + { + "text": "For as long as Linda Kelsey can remember she has consistently blushed bright pink at the slightest provocation.", + "length": 111 + }, + { + "text": "I know that when my partner grabs me for a kiss in the park, I will blush and pull away, even though I’m thrilled.", + "length": 116 + }, + { + "text": "‘It kicks off an immediate physical reaction with a heightened state of arousal or adrenaline rush,’ he explains.", + "length": 117 + }, + { + "text": "That perhaps explains why no woman’s make-up is ever complete without a dab of blusher to give her a slight pink glow.", + "length": 120 + }, + { + "text": "But one of the main risks is compensatory excess sweating, which can be as difficult to deal with as the initial problem.", + "length": 121 + }, + { + "text": "Professor Bor says the secret is to force yourself to cope in embarrassing situations and that’s how I’ve managed to get by.", + "length": 128 + }, + { + "text": "So I accept invitations to public-speaking engagements and am interviewed on radio and TV — even though I really don’t want to.", + "length": 131 + }, + { + "text": "‘If your parents are shy you may inherit that tendency, or perhaps you just pick up on their way of behaving,’ says Professor Bor.", + "length": 134 + }, + { + "text": "And that when I walk through the Nothing To Declare corridor in customs, I will blush, even though I really do have nothing to declare.", + "length": 135 + }, + { + "text": "While most little girls love pink, pink was Linda's least favourite colour growing up because it reminded her of the colour of my face .", + "length": 136 + }, + { + "text": "If anyone makes a flattering remark, I blush; if they make an unflattering one, I blush, and if I do something silly, of course, I blush.", + "length": 137 + }, + { + "text": "Despite me having unwashed hair and no make-up, he recognised me and announced cheerily: ‘I was hopelessly in love with you when I was 14.", + "length": 140 + }, + { + "text": "According to a recent study, we embarrass ourselves as many as seven times per week and women feel embarrassed three times more often than men.", + "length": 143 + }, + { + "text": "That when I am about to introduce someone and have forgotten their name — which happens all the time — I will blush like an awkward teenager.", + "length": 145 + }, + { + "text": "There’s also hope in the form of an operation, endoscopic transthoracic sympathectomy, which destroys the nerve responsible for facial blushing.", + "length": 146 + }, + { + "text": "While most little girls love pink, pink was my least favourite colour growing up because it reminded me of the colour of my face — and still does.", + "length": 148 + }, + { + "text": "In the same way, an easily embarrassed adult might be enjoying their birthday party only to feel horribly exposed when everyone sings Happy Birthday.", + "length": 149 + }, + { + "text": "‘But this could be because women are more willing to admit they have negative feelings and find it easier to talk about their problems than men do.", + "length": 149 + }, + { + "text": "For those of us who despair of our constant blushing, there is some comfort to be gained from recent research that proves embarrassment can be a good thing.", + "length": 156 + }, + { + "text": "Most people eventually stop going red when they get older, mainly because their confidence grows and they learn how to deal with embarrassing situations better.", + "length": 160 + }, + { + "text": "uses a green-tinged face cream under foundation to counteract redness, but would only consider surgery if blushing was preventing her from getting on with her life .", + "length": 165 + }, + { + "text": "According to Professor Bor, modern psychological methods for dealing with social anxiety and embarrassment offer some of the most successful and rapid treatments of all.", + "length": 169 + }, + { + "text": "‘I just don’t know where they’ve got to,’ I moaned, only to have my partner point out that not only was I wearing them but I had another pair perched on my head!", + "length": 169 + }, + { + "text": "While I often use a green-tinged face cream under foundation to counteract redness, I’d only consider surgery if blushing was preventing me from getting on with my life.", + "length": 171 + }, + { + "text": "Some 200 operations to cure blushing are done every year on the NHS (stock picture) So how do you overcome your propensity to turn pink if you don’t want to resort to surgery?", + "length": 177 + }, + { + "text": "When I look at old photos of my mother she’s always posing with her head slightly at an angle and eyes slightly away from the direct gaze of the camera, as if mildly embarrassed.", + "length": 180 + }, + { + "text": "Professor Bor gives the example of a bright student who knows the answer to a question asked by a teacher in class but won’t put up his or her hand because then everyone else will look at them.", + "length": 195 + }, + { + "text": "’ Not everyone who feels embarrassment blushes, but chronic blushers sometimes become so fearful of blushing that they begin to lead a hermit-like existence, avoiding social situations entirely.", + "length": 196 + }, + { + "text": "A few sessions of Cognitive Behavioural Therapy (CBT), a therapy based on the idea that unhelpful and unrealistic thinking leads to negative behaviour, can give a big confidence boost — and blushing the boot.", + "length": 210 + }, + { + "text": "Studies from the University of California, Berkeley, have shown that people who display embarrassment when describing embarrassing moments are regarded as more co-operative and trustworthy than those who show pride.", + "length": 215 + }, + { + "text": "According to clinical psychologist Professor Robert Bor, author of Coping Successfully With Shyness, we get embarrassed because of a sense of either actual or impending shame, as well as a fear of unwarranted attention.", + "length": 219 + }, + { + "text": "Although the survey suggests women feel embarrassed three times as often as men, Professor Robert Edelmann, author of Coping With Blushing, thinks it would be wrong to conclude that women suffer embarrassment more than men.", + "length": 223 + }, + { + "text": "’ Just why some people are more easily embarrassed than others may be partly genetic and partly learned (stock picture) Just why some people are more easily embarrassed than others may be partly genetic and partly learned.", + "length": 224 + }, + { + "text": "Not everyone who is shy needs professional help, but if embarrassment is inhibiting you from getting on with your life —from forming relationships, progressing in your career or simply from going out and having fun — help is available.", + "length": 239 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5453664064407349 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:16.955188611Z", + "first_section_created": "2025-12-23T09:32:16.955546225Z", + "last_section_published": "2025-12-23T09:32:16.955827537Z", + "all_results_received": "2025-12-23T09:32:17.051642843Z", + "output_generated": "2025-12-23T09:32:17.051888353Z", + "total_processing_time_ms": 96, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 95, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:16.955546225Z", + "publish_time": "2025-12-23T09:32:16.955766834Z", + "first_worker_start": "2025-12-23T09:32:16.956305356Z", + "last_worker_end": "2025-12-23T09:32:17.030245Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:16.956438262Z", + "start_time": "2025-12-23T09:32:16.956557766Z", + "end_time": "2025-12-23T09:32:16.956678071Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:16.9566Z", + "start_time": "2025-12-23T09:32:16.956727Z", + "end_time": "2025-12-23T09:32:17.030245Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:16.95641306Z", + "start_time": "2025-12-23T09:32:16.956594268Z", + "end_time": "2025-12-23T09:32:16.956736474Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:16.956235253Z", + "start_time": "2025-12-23T09:32:16.956305356Z", + "end_time": "2025-12-23T09:32:16.956414761Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:16.955793535Z", + "publish_time": "2025-12-23T09:32:16.955827537Z", + "first_worker_start": "2025-12-23T09:32:16.956516265Z", + "last_worker_end": "2025-12-23T09:32:17.05071Z", + "total_journey_time_ms": 94, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:16.956476763Z", + "start_time": "2025-12-23T09:32:16.956516265Z", + "end_time": "2025-12-23T09:32:16.956569667Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:16.956809Z", + "start_time": "2025-12-23T09:32:16.956932Z", + "end_time": "2025-12-23T09:32:17.05071Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 93 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:16.956544466Z", + "start_time": "2025-12-23T09:32:16.956605668Z", + "end_time": "2025-12-23T09:32:16.956723573Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:16.956561167Z", + "start_time": "2025-12-23T09:32:16.956608968Z", + "end_time": "2025-12-23T09:32:16.95664557Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 166, + "min_processing_ms": 73, + "max_processing_ms": 93, + "avg_processing_ms": 83, + "median_processing_ms": 93, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3737, + "slowest_section_id": 1, + "slowest_section_time_ms": 94 + } +} diff --git a/data/output/000940f2bb357ac04a236a232156d8b9b18d1667.json b/data/output/000940f2bb357ac04a236a232156d8b9b18d1667.json new file mode 100644 index 0000000..31a198e --- /dev/null +++ b/data/output/000940f2bb357ac04a236a232156d8b9b18d1667.json @@ -0,0 +1,250 @@ +{ + "file_name": "000940f2bb357ac04a236a232156d8b9b18d1667.txt", + "total_words": 457, + "top_n_words": [ + { + "word": "kasem", + "count": 21 + }, + { + "word": "the", + "count": 16 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "his", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "on", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "\"Mr.", + "length": 4 + }, + { + "text": "\"Any further updates on Mr.", + "length": 27 + }, + { + "text": "CNN's Jane Caffrey contributed to this report.", + "length": 46 + }, + { + "text": "Kasem's daughter wins additional powers in court .", + "length": 50 + }, + { + "text": "Anthony Hospital told CNN in a written statement Thursday.", + "length": 58 + }, + { + "text": "Kasem's condition will be at the discretion and approval of his children,\" Thompson said.", + "length": 89 + }, + { + "text": "\" A patient can be listed as both critical and stable, if his condition is poor but not deteriorating further.", + "length": 110 + }, + { + "text": "Deraney clarified that he never meant that Kasem's health was failing as was reported by several media outlets.", + "length": 111 + }, + { + "text": "On Thursday, a message appeared on the Twitter account of Deraney PR, saying that Kasem was in \"stable condition.", + "length": 113 + }, + { + "text": "When asked if they feared Casey Kasem might die, Deraney said it could be his last moments or he could get better.", + "length": 114 + }, + { + "text": "Casey Kasem, who was also the voice of Shaggy in the cartoon \"Scooby-Doo\" and an announcer for NBC, retired in 2009.", + "length": 116 + }, + { + "text": "On Friday afternoon, a Kitsap County judge will continue the hearing and get an update on the situation, Deraney said.", + "length": 118 + }, + { + "text": "Kasem is alert and appears comfortable at this time,\" Scott Thompson, a spokesman for the facility in Gig Harbor added.", + "length": 119 + }, + { + "text": "(CNN) -- Renowned radio personality Casey Kasem is in critical condition at a hospital in western Washington, a spokesman for St.", + "length": 129 + }, + { + "text": "Kasem has Lewy body disease, the most common type of progressive dementia after Alzheimer's, and has been bed-ridden for some time.", + "length": 131 + }, + { + "text": "Last week, a Washington court granted Kerri Kasem the right to visit her father one hour a day and to have him examined by a doctor.", + "length": 132 + }, + { + "text": "The radio icon has been at the center of a family feud between Jean Kasem, his wife of 34 years, and his three children from his first marriage.", + "length": 144 + }, + { + "text": "Deraney expected the judge to rule on whether Jean Kasem would have to let Kerri Kasem have more input on where her father lives and who cares for him.", + "length": 151 + }, + { + "text": "Danny Deraney, a representative of daughter Kerri Kasem, told CNN that members of the family, including Casey Kasem's brother, were flying to Washington.", + "length": 153 + }, + { + "text": "Kasem was admitted to the hospital Sunday after one of his daughters and an ambulance crew retrieved him from a home where he and his wife were staying with friends.", + "length": 165 + }, + { + "text": "Kerri Kasem; her sister, Julie; and their brother, Mike Kasem, have contended since last year that Jean Kasem has prevented the three siblings from visiting their father.", + "length": 170 + }, + { + "text": "The 82-year-old former host of \"American Top 40\" and \"Casey's Top 40\" is receiving antibiotics through IVs, blood pressure support medicine and care for his bed sores, Thompson said.", + "length": 182 + }, + { + "text": "He had been at a facility in Santa Monica, California, before Jean Kasem took him to Washington state after his daughter Kerri Kasem won temporary conservatorship over her father's care.", + "length": 186 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4874524474143982 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:17.456590455Z", + "first_section_created": "2025-12-23T09:32:17.456981871Z", + "last_section_published": "2025-12-23T09:32:17.457149678Z", + "all_results_received": "2025-12-23T09:32:17.521879817Z", + "output_generated": "2025-12-23T09:32:17.522077725Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:17.456981871Z", + "publish_time": "2025-12-23T09:32:17.457149678Z", + "first_worker_start": "2025-12-23T09:32:17.457709701Z", + "last_worker_end": "2025-12-23T09:32:17.520968Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:17.457707301Z", + "start_time": "2025-12-23T09:32:17.457790204Z", + "end_time": "2025-12-23T09:32:17.457847106Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:17.457899Z", + "start_time": "2025-12-23T09:32:17.458048Z", + "end_time": "2025-12-23T09:32:17.520968Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:17.457638498Z", + "start_time": "2025-12-23T09:32:17.457743202Z", + "end_time": "2025-12-23T09:32:17.457853607Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:17.457625097Z", + "start_time": "2025-12-23T09:32:17.457709701Z", + "end_time": "2025-12-23T09:32:17.457755603Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2639, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/00094243aed3af8b45dca389eeb46fa3e5c30027.json b/data/output/00094243aed3af8b45dca389eeb46fa3e5c30027.json new file mode 100644 index 0000000..1c453a3 --- /dev/null +++ b/data/output/00094243aed3af8b45dca389eeb46fa3e5c30027.json @@ -0,0 +1,262 @@ +{ + "file_name": "00094243aed3af8b45dca389eeb46fa3e5c30027.txt", + "total_words": 561, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "i", + "count": 22 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "she", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "was", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "it", + "count": 11 + }, + { + "word": "that", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "'I'm not against trapping per se.", + "length": 33 + }, + { + "text": "' 'I wanted to go back and tell the Raptor Center where it was.", + "length": 63 + }, + { + "text": "The eagle was found and euthanized three days after she freed it.", + "length": 65 + }, + { + "text": "She spent an hour freeing the creature before alerting a bird rescue firm.", + "length": 74 + }, + { + "text": "I knew if I left it there all night, it would have had a worse chance of surviving.", + "length": 83 + }, + { + "text": "On Thursday, the case was dismissed by a judge who called Adair's work 'admirable'.", + "length": 83 + }, + { + "text": "'She should not have to run the risk of a conviction on her record for this offense.", + "length": 84 + }, + { + "text": "I only object when the traps are on the trail where I think they are safety concerns.", + "length": 85 + }, + { + "text": "' 'I grew up hunting and fishing here, I've got several animal skins on my walls,' she said.", + "length": 92 + }, + { + "text": "I knew that would be the best thing to do, but I also knew that it would be getting dark soon.", + "length": 94 + }, + { + "text": "' Speaking to KTOO, she said: 'I knew at the time that the eagle didn't have a very good chance.", + "length": 96 + }, + { + "text": "I am concerned about the traps when they're on the trail in such a way as these were,' Adair said.", + "length": 98 + }, + { + "text": "Saved: The Bald Eagle caught in a leg-hold trap in Juneau on Christmas Eve, found and released by Adair .", + "length": 105 + }, + { + "text": "'What we expect from the public is if they come upon an eagle in a trap, to notify us as soon as possible.", + "length": 106 + }, + { + "text": "Arrested: Kathleen Adair, 39, was charged with hindering lawful trapping after snaring three traps in Alaska .", + "length": 110 + }, + { + "text": "Eventually tracked down by authorities she was charged and hauled to court facing a $500 fine and 30 days in jail.", + "length": 114 + }, + { + "text": "That way we can go out there and see what's going on,' Alaska Wildlife Trooper Sgt Aaron Frenzel told the station.", + "length": 114 + }, + { + "text": "Defending her actions, Adair told the Juneau Empire she is not 'an ecoterrorist trying to ruin trappers' livelihood.", + "length": 117 + }, + { + "text": "'I don't personally trap, and I don't choose to, I don't want to, but I'm not going to stop someone else from doing it.", + "length": 119 + }, + { + "text": "'Her actions in saving the eagle were laudable,' Juneau District Attorney James Scott said during Adair's arraignment on Thursday afternoon.", + "length": 140 + }, + { + "text": "A hiker was arrested and warned she could face jail after freeing an eagle from a trap and springing three more traps to protect other animals.", + "length": 143 + }, + { + "text": "'It was two miles from the road and it was all the way at the end of the road, so I knew that they wouldn't be able to get out there that day to it.", + "length": 148 + }, + { + "text": "Kathleen Adair, 39, was walking her three dogs up Davies Creek Trail in Alaska on Christmas Eve when she spotted the bird with each leg shut inside traps.", + "length": 154 + }, + { + "text": "Heading home, she also sprung another trap which she spotted in the ground - prompting an investigation by Alaska Wildlife Troopers that landed her in court.", + "length": 157 + }, + { + "text": "' 'When she's hiking and she comes across an eagle in a snare, I encourage her to rescue that eagle again, and I will screen that case out as well,' the district attorney added, according to the Empire.", + "length": 202 + }, + { + "text": "'But even as it was, I could tell one of the legs was just dangling, just completely broken and I knew they wouldn't be able to fix that, but I was hoping they could at least fix the other and keep it as an educational bird.", + "length": 224 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.42224550247192383 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:17.957808692Z", + "first_section_created": "2025-12-23T09:32:17.958174907Z", + "last_section_published": "2025-12-23T09:32:17.958343614Z", + "all_results_received": "2025-12-23T09:32:18.021096573Z", + "output_generated": "2025-12-23T09:32:18.021285881Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:17.958174907Z", + "publish_time": "2025-12-23T09:32:17.958343614Z", + "first_worker_start": "2025-12-23T09:32:17.959023042Z", + "last_worker_end": "2025-12-23T09:32:18.020218Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:17.958960339Z", + "start_time": "2025-12-23T09:32:17.959023042Z", + "end_time": "2025-12-23T09:32:17.959085544Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:17.959226Z", + "start_time": "2025-12-23T09:32:17.959382Z", + "end_time": "2025-12-23T09:32:18.020218Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:17.95898664Z", + "start_time": "2025-12-23T09:32:17.959055443Z", + "end_time": "2025-12-23T09:32:17.959149847Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:17.95897534Z", + "start_time": "2025-12-23T09:32:17.959043443Z", + "end_time": "2025-12-23T09:32:17.959090345Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2924, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/00095b6362f23a733fb1a4224600023c6584f156.json b/data/output/00095b6362f23a733fb1a4224600023c6584f156.json new file mode 100644 index 0000000..22e3aa9 --- /dev/null +++ b/data/output/00095b6362f23a733fb1a4224600023c6584f156.json @@ -0,0 +1,322 @@ +{ + "file_name": "00095b6362f23a733fb1a4224600023c6584f156.txt", + "total_words": 724, + "top_n_words": [ + { + "word": "the", + "count": 49 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "it", + "count": 11 + }, + { + "word": "are", + "count": 9 + }, + { + "word": "at", + "count": 9 + }, + { + "word": "s", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Ian Drury .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Morale is really low.", + "length": 21 + }, + { + "text": "19:24 EST, 31 July 2012 .", + "length": 25 + }, + { + "text": "02:18 EST, 1 August 2012 .", + "length": 26 + }, + { + "text": "'They were treated better in the desert.", + "length": 40 + }, + { + "text": "‘It really does stink in there when it’s hot.", + "length": 49 + }, + { + "text": "If the RSPCA found dogs being kept like that they’d take them away.", + "length": 69 + }, + { + "text": "This picture shows the sleeping quarters at the temporary Army barracks .", + "length": 73 + }, + { + "text": "Furious: One soldier's mother said the billets were 'worse than Afghanistan'.", + "length": 77 + }, + { + "text": "In a grimy underground car park, row upon row of camp beds are crammed together.", + "length": 80 + }, + { + "text": "One soldier’s furious mother said the billets were ‘worse than Afghanistan’.", + "length": 82 + }, + { + "text": "The Ministry of Defence said it was 'working hard' to make accommodation comfortable .", + "length": 86 + }, + { + "text": "The Ministry of Defence said it was ‘working hard’ to make accommodation comfortable.", + "length": 89 + }, + { + "text": "‘I understand they have got some free tickets but that’s only because the seats are empty.", + "length": 94 + }, + { + "text": "They’re bringing more and more men in and just lining up rows of camp beds anywhere they can find.", + "length": 100 + }, + { + "text": "‘We’re proud to be part of the Olympics but the conditions we are being asked to live in are grim.", + "length": 102 + }, + { + "text": "If it was a contingency plan you have got to ask what plans did they make to accommodate the soldiers?", + "length": 102 + }, + { + "text": "’ A soldier, who asked not to be identified, told the Mail: ‘Everyone’s feeling really miserable about it.", + "length": 112 + }, + { + "text": "Horrific: The stench from a row of overflowing portable lavatories wafts through Tobacco Dock, near Tower Bridge .", + "length": 114 + }, + { + "text": "These are the spartan sleeping conditions facing many of the British heroes helping provide security at the Olympics.", + "length": 117 + }, + { + "text": "Poor: These are the spartan sleeping conditions facing many of the British heroes helping provide security at the Olympics .", + "length": 124 + }, + { + "text": "The living conditions outraged relatives of personnel from the 1st Battalion The Princess of Wales’s Royal Regiment (1PWRR).", + "length": 126 + }, + { + "text": "Local businesses and shops have stepped in to top up the soldiers’ Army rations – offering half-price pizzas and fast food.", + "length": 127 + }, + { + "text": "Help: Local businesses and shops have stepped in to top up the soldiers' Army rations - offering half-price pizzas and fast food.", + "length": 129 + }, + { + "text": "Labour MP John Denham said: ‘When the troops were put on standby, we were told that this was part of a contingency plan being put in place.", + "length": 141 + }, + { + "text": "’ One military source said: ‘It’s nightmarishly hot in some of the rooms because they heat up all day and then are crammed full of the lads.", + "length": 146 + }, + { + "text": "Fiona Mason, 51, from Fair Oak, Hampshire, whose son Paul, 21, serves with 1PWRR – known as the Tigers – said: ‘It’s absolutely disgusting.", + "length": 147 + }, + { + "text": "More than 2,000 soldiers guarding London venues are forced to share a sweltering, cramped and poorly-lit concrete bunker at a disused shopping centre.", + "length": 150 + }, + { + "text": "‘It compares very positively with the type of living arrangements soldiers will have experienced on overseas operations and on regular military exercises in the UK.", + "length": 166 + }, + { + "text": "’ Around 18,000 troops were drafted in at the last minute to provide security at the Games after G4S admitted it did not have the manpower to fulfil its Olympic contract.", + "length": 172 + }, + { + "text": "’ An MoD spokesman said: ‘The specific area shown in the photograph is underground; it is dry, lit, equipped with ablution facilities and has power and WiFi capability.", + "length": 172 + }, + { + "text": "Grim: More than 2,000 soldiers guarding London venues are forced to share a sweltering, cramped and poorly-lit concrete bunker at a disused shopping centre at Tobacco Dock .", + "length": 173 + }, + { + "text": "Many of the uniformed troops had their annual leave cancelled after returning home from six gruelling months on the front line in Afghanistan to cover for the troubled private security firm G4S.", + "length": 194 + }, + { + "text": "Heroes: Many of the uniformed troops had their annual leave cancelled after returning home from six gruelling months on the front line in Afghanistan to cover for the troubled private security firm G4S .", + "length": 203 + }, + { + "text": "’ Simon Lynch-Garbett, 56, from Tenerife, whose son James, 28, was deployed to the Olympics, said: ‘They’ve spent months in Afghanistan fighting for our country and this is the way they’re being treated.", + "length": 211 + }, + { + "text": "The stench from a row of overflowing portable lavatories wafts through Tobacco Dock, near Tower Bridge, which has green military cots spread along nearly every corridor as every available piece of space is used.", + "length": 211 + }, + { + "text": "Not happy: The living conditions outraged relatives of personnel from the 1st Battalion The Princess of Wales¿s Royal Regiment (1PWRR) Making a home: A soldier prepares his bed in the sleeping quarters at Tobacco Dock .", + "length": 220 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.857502281665802 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:18.459116833Z", + "first_section_created": "2025-12-23T09:32:18.459504449Z", + "last_section_published": "2025-12-23T09:32:18.459672456Z", + "all_results_received": "2025-12-23T09:32:18.529158789Z", + "output_generated": "2025-12-23T09:32:18.529341797Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:18.459504449Z", + "publish_time": "2025-12-23T09:32:18.459672456Z", + "first_worker_start": "2025-12-23T09:32:18.460380585Z", + "last_worker_end": "2025-12-23T09:32:18.528282Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:18.460326883Z", + "start_time": "2025-12-23T09:32:18.460380585Z", + "end_time": "2025-12-23T09:32:18.460456288Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:18.46058Z", + "start_time": "2025-12-23T09:32:18.460725Z", + "end_time": "2025-12-23T09:32:18.528282Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:18.460334583Z", + "start_time": "2025-12-23T09:32:18.460405486Z", + "end_time": "2025-12-23T09:32:18.46051579Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:18.460330083Z", + "start_time": "2025-12-23T09:32:18.460386185Z", + "end_time": "2025-12-23T09:32:18.460441487Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4348, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/00096a435fc5eb6634d7c187c49257f975df5b9d.json b/data/output/00096a435fc5eb6634d7c187c49257f975df5b9d.json new file mode 100644 index 0000000..cf833b0 --- /dev/null +++ b/data/output/00096a435fc5eb6634d7c187c49257f975df5b9d.json @@ -0,0 +1,428 @@ +{ + "file_name": "00096a435fc5eb6634d7c187c49257f975df5b9d.txt", + "total_words": 1078, + "top_n_words": [ + { + "word": "to", + "count": 42 + }, + { + "word": "the", + "count": 37 + }, + { + "word": "and", + "count": 34 + }, + { + "word": "he", + "count": 30 + }, + { + "word": "his", + "count": 25 + }, + { + "word": "was", + "count": 25 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "that", + "count": 17 + }, + { + "word": "joshua", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "30pm.", + "length": 5 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "10:33 EST, 2 October 2012 .", + "length": 27 + }, + { + "text": "13:48 EST, 3 October 2012 .", + "length": 27 + }, + { + "text": "'They need to educate their children as well.", + "length": 45 + }, + { + "text": "He added: 'Better education is hugely important.", + "length": 48 + }, + { + "text": "Teachers and parents should be given more advice.", + "length": 49 + }, + { + "text": "He was loved by so many people and had a lot of friends.", + "length": 56 + }, + { + "text": "' Sorry we are unable to accept comments for legal reasons.", + "length": 59 + }, + { + "text": "'Josh lived life to the full and his illness never stopped him.", + "length": 63 + }, + { + "text": "He was rushed to Royal Oldham Hospital but was pronounced dead at 8.", + "length": 68 + }, + { + "text": "I was particularly impressed with how he tidied up around the house.", + "length": 68 + }, + { + "text": "Probably while everyone was fussing he was fussing less than anyone.", + "length": 68 + }, + { + "text": "He died later in hospital despite the efforts of doctors to save him.", + "length": 69 + }, + { + "text": "'Joshua was into everything and yet there was also a very tender side to him.", + "length": 77 + }, + { + "text": "Joshua Platt, nine, refused to let his asthma from stopping him enjoying life .", + "length": 79 + }, + { + "text": "' Joshua was taking puffs from his inhaler but they didn't make any difference.", + "length": 79 + }, + { + "text": "It doesn't surprise me to hear that Joshua was very laid back about his condition.", + "length": 82 + }, + { + "text": "Britain's five-million-plus asthmatics are normally issued with two types of inhaler.", + "length": 85 + }, + { + "text": "It doesn't surprise me at all to hear that Joshua was very laid back about his illness.", + "length": 87 + }, + { + "text": "'Everyone else was reminding him to take it but he was not aware of how important it was.", + "length": 89 + }, + { + "text": "to assess children each time to monitor their condition and compliance with the medication.", + "length": 91 + }, + { + "text": "He helped Joanna tidy up before going to his grandmother's house for the afternoon of February 12.", + "length": 98 + }, + { + "text": "'Everything I've heard about him is positive and that is a wonderful trait to have at the age of nine.", + "length": 102 + }, + { + "text": "Today his mother Joanna called for more awareness of asthma following an inquest into her son's death.", + "length": 102 + }, + { + "text": "' Joshua was rushed to Royal Oldham Hospital in February after having a severe asthma attack, but died .", + "length": 104 + }, + { + "text": "He was panicking and I was trying to guide him and try to get him to inhale slowly but he just couldn't.", + "length": 104 + }, + { + "text": "I think children who have asthma need to be better educated about the importance of all their medication.", + "length": 105 + }, + { + "text": "Tragedy struck after Joshua returned home from his visiting his grandmother and he suffered a major attack.", + "length": 107 + }, + { + "text": "He became unconscious and his mum dialled 999 and tried to resuscitate him while waiting for the ambulance.", + "length": 107 + }, + { + "text": "Brown inhalers (left) are used for preventing asthma attacks, while blue inhalers are used during an episode .", + "length": 110 + }, + { + "text": "'I think at that age you feel that you are invincible - a bit like the superheroes that you look at in comics.", + "length": 110 + }, + { + "text": "He tried to use an inhaler he was prescribed with to relieve the symptoms but his airways were already blocked.", + "length": 111 + }, + { + "text": "'Parents need to be made aware that whether you have got the asthma in control or not that this can still happen.", + "length": 113 + }, + { + "text": "The second type, which is often blue, is filled with a drug that rapidly opens up the airways during an asthma attack.", + "length": 118 + }, + { + "text": "Joanna said: 'He was trying to take deep breaths while trying to talk to me at the same time but was struggling to do both.", + "length": 123 + }, + { + "text": "On the day Joshua died, he woke up with a very mild cough and a \"bit of a sniffle\" but nothing his mother was concerned about.", + "length": 126 + }, + { + "text": "If he ever felt out of breath he would use his inhaler and carry on and he would often accidentally leave it at his primary school.", + "length": 131 + }, + { + "text": "A lot of people are so laid back about it and think that you can just rely on your blue inhaler but once those tubes close that's it.", + "length": 133 + }, + { + "text": "Consultant paediatrician Dr Abdul Rehman said a meeting between health professionals and police had been held following Joshua's death.", + "length": 135 + }, + { + "text": "A 'dynamo' boy of nine died from a severe asthma attack after he refused to let the condition spoil his love of playing with friends and family.", + "length": 144 + }, + { + "text": "'To other parents with asthmatic children all I can say is don't take the inhalers for granted and keep on your child's back about taking the brown preventer.", + "length": 158 + }, + { + "text": "It emerged the youngster had become too reliant on his blue inhaler and didn't think he had to use a brown inhaler which prevented asthma attacks taking place.", + "length": 159 + }, + { + "text": "A post-mortem examination confirmed that Joshua died of an asthma attack and his family were told there was nothing they could have done to prevent the tragedy.", + "length": 160 + }, + { + "text": "Joanna said he had experienced worse symptoms in the past but he asked for his inhaler and started rocking himself backwards and forwards and asked for his back to be rubbed.", + "length": 174 + }, + { + "text": "He returned to his home in the Hollinwood area of Oldham at around 7pm and his mum gave him some Calpol children's medicine before he sat on the sofa and started to wheeze slightly.", + "length": 181 + }, + { + "text": "Joshua Platt had been diagnosed with asthma at the age of one but whenever he got out of breath during football matches or other games he would take his inhaler and then carry on playing.", + "length": 187 + }, + { + "text": "The inquest was told that although sudden severe attacks were common in those who did not manage their condition, the same could also happen to sufferers who think their illness is under control.", + "length": 195 + }, + { + "text": "Recording a verdict of death by natural causes coroner Simon Nelson urged teachers, parents and GPs across the borough to ensure sufferers understand the importance of their preventative medication.", + "length": 198 + }, + { + "text": "The group agreed that as a result Joshua's GP, based at the Hollinwood Practice in Oldham, would change procedures - offering more education to the families of asthmatics on how to manage the condition and risks involved.", + "length": 221 + }, + { + "text": "The brown model contains a low dose of steroids and is prescribed to stop attacks from starting and it is thought that it works best if taken regularly to build up resistance in the lungs, with many people using it daily.", + "length": 221 + }, + { + "text": "The Oldham hearing was told Joshua who was prescribed with both inhalers led an active life taking part in camping, swimming, football, basketball, judo and playing with his younger sisters Demi-Rae, four, and two-year-old Macie.", + "length": 229 + }, + { + "text": "Mother-of-three Joanna said: 'I think Josh had become laid back about his medication and thought that because he had his inhaler to use if he had an attack, he didn't need to worry about the other inhaler that would prevent it in the first place.", + "length": 246 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6471637189388275 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:18.960420723Z", + "first_section_created": "2025-12-23T09:32:18.960796138Z", + "last_section_published": "2025-12-23T09:32:18.961137552Z", + "all_results_received": "2025-12-23T09:32:19.060990566Z", + "output_generated": "2025-12-23T09:32:19.06160339Z", + "total_processing_time_ms": 101, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 99, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:18.960796138Z", + "publish_time": "2025-12-23T09:32:18.961022247Z", + "first_worker_start": "2025-12-23T09:32:18.96157997Z", + "last_worker_end": "2025-12-23T09:32:19.039134Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:18.961492966Z", + "start_time": "2025-12-23T09:32:18.96157997Z", + "end_time": "2025-12-23T09:32:18.961706975Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:18.961817Z", + "start_time": "2025-12-23T09:32:18.961957Z", + "end_time": "2025-12-23T09:32:19.039134Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:18.961514267Z", + "start_time": "2025-12-23T09:32:18.96158527Z", + "end_time": "2025-12-23T09:32:18.961726576Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:18.961527268Z", + "start_time": "2025-12-23T09:32:18.96158797Z", + "end_time": "2025-12-23T09:32:18.961704175Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:18.961053149Z", + "publish_time": "2025-12-23T09:32:18.961137552Z", + "first_worker_start": "2025-12-23T09:32:18.961563069Z", + "last_worker_end": "2025-12-23T09:32:19.060137Z", + "total_journey_time_ms": 99, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:18.96157977Z", + "start_time": "2025-12-23T09:32:18.961617671Z", + "end_time": "2025-12-23T09:32:18.961642772Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:18.961817Z", + "start_time": "2025-12-23T09:32:18.96195Z", + "end_time": "2025-12-23T09:32:19.060137Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 98 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:18.961555669Z", + "start_time": "2025-12-23T09:32:18.961600271Z", + "end_time": "2025-12-23T09:32:18.961639672Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:18.961512267Z", + "start_time": "2025-12-23T09:32:18.961563069Z", + "end_time": "2025-12-23T09:32:18.96158247Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 175, + "min_processing_ms": 77, + "max_processing_ms": 98, + "avg_processing_ms": 87, + "median_processing_ms": 98, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2999, + "slowest_section_id": 1, + "slowest_section_time_ms": 99 + } +} diff --git a/data/output/000994296459fb84cf26cf71a0c109c192758c11.json b/data/output/000994296459fb84cf26cf71a0c109c192758c11.json new file mode 100644 index 0000000..2a1a3de --- /dev/null +++ b/data/output/000994296459fb84cf26cf71a0c109c192758c11.json @@ -0,0 +1,294 @@ +{ + "file_name": "000994296459fb84cf26cf71a0c109c192758c11.txt", + "total_words": 659, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "was", + "count": 19 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "her", + "count": 14 + }, + { + "word": "i", + "count": 10 + }, + { + "word": "it", + "count": 9 + }, + { + "word": "that", + "count": 9 + }, + { + "word": "an", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "The temperature was 6.", + "length": 22 + }, + { + "text": "‘Where do you draw the line?", + "length": 30 + }, + { + "text": "It simply didn’t merit the hassle.", + "length": 36 + }, + { + "text": "7C when she was interrogated last Friday.", + "length": 41 + }, + { + "text": "I wasn’t doing anything wrong, so I walked off.", + "length": 49 + }, + { + "text": "I told them that we came out to the prom all the time to play.", + "length": 62 + }, + { + "text": "‘I explained that she was fine and Maddy certainly looked fine.", + "length": 65 + }, + { + "text": "Do you police children wearing too many clothes on the beach in summer?", + "length": 71 + }, + { + "text": "I am not convinced police should be harassing mums playing with their babies.", + "length": 77 + }, + { + "text": "‘This is by far the barmiest cold case I have ever come across,’ he added.", + "length": 78 + }, + { + "text": "She had been out of the car for less than 10 minutes when the police arrived .", + "length": 78 + }, + { + "text": "Can people dial 999 if they suspect a parent hasn’t put enough sun cream on a youngster?", + "length": 90 + }, + { + "text": "‘I added that they’d be taking her to the social workers next if I gave them my details.", + "length": 92 + }, + { + "text": "The 43-year-old nutritionist said: ‘Maddy was covered up and wasn’t cold, that was clear.", + "length": 93 + }, + { + "text": "’ When one of the policemen asked Mrs Andrew her name, she responded: ‘This is ridiculous.", + "length": 94 + }, + { + "text": "The mother is clearly not committing a crime by taking her child for a walk along the promenade.", + "length": 96 + }, + { + "text": "‘Do officers now need a thermometer to take children’s temperatures along with their truncheons?", + "length": 100 + }, + { + "text": "Two policemen were immediately sent to Scarborough seafront to question her about Maddy’s welfare.", + "length": 100 + }, + { + "text": "They’d be better off policing McDonald’s and checking that children are not eating too much in there.", + "length": 105 + }, + { + "text": "But that did not stop a passer-by dialling 999 to report her mother Paula for exposing a child to the cold.", + "length": 107 + }, + { + "text": "’ But Stephen Hayes, an ex-policeman and writer, said the case typified what was wrong with modern policing.", + "length": 110 + }, + { + "text": "’ Mrs Andrew was on the promenade with Maddy while her financier husband Mike, 49, was surfing off the beach.", + "length": 111 + }, + { + "text": "After refusing to reveal who she was, Mrs Andrew, from Scarborough, was asked: ‘So you don’t want to co-operate?", + "length": 116 + }, + { + "text": "’ She said: ‘I told them that it was a mother’s right to play with her daughter and it wasn’t a co-operation thing.", + "length": 123 + }, + { + "text": "‘We’d not even been there ten minutes when the police turned up,’ said Mrs Andrew, who has two other children aged 12 and 14.", + "length": 131 + }, + { + "text": "Dressed in woolly tights, leggings, an all-in-one vest and fleecy top, nine-month-old Maddy Andrew seemed well wrapped up for winter.", + "length": 133 + }, + { + "text": "’ Mrs Andrew was with her daughter on the promenade, while her financier husband Mike, 49, was surfing off the beach at Scarborough.", + "length": 134 + }, + { + "text": "’ Mrs Andrew, who refused to give her name to the officers, said one had claimed it was their duty to ‘keep an eye on these things’.", + "length": 138 + }, + { + "text": "The nutritionist, 43, who has two older children aged 14 and 12, was told an anonymous 999 call had alleged her baby daughter looked cold .", + "length": 139 + }, + { + "text": "Paula Andrew was playing on the promenade with her nine-month-old daughter Maddy when a police van and two officers turned up and began to question her .", + "length": 153 + }, + { + "text": "‘They could see everything was OK, but they told me that they’d had an anonymous call saying there was a little girl playing on the promenade and she was cold.", + "length": 163 + }, + { + "text": "’ A North Yorkshire Police spokesman said yesterday : ‘All reports concerning the safety of children are taken very seriously by North Yorkshire Police and must be properly checked out.", + "length": 189 + }, + { + "text": "She added: ‘It can take more than an hour for police to turn up if I call in complaining about yobs at the back of my house and sometimes they don’t attend at all, but an anonymous call can send them racing to the promenade.", + "length": 228 + }, + { + "text": "‘In most areas it is easier to order a takeaway pizza and get it delivered than get a response to an emergency 999 call, so it is astonishing that, when there is a swift response, it is to a totally innocent lady playing with her baby.", + "length": 237 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.49926134943962097 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:19.46190568Z", + "first_section_created": "2025-12-23T09:32:19.462195291Z", + "last_section_published": "2025-12-23T09:32:19.4624041Z", + "all_results_received": "2025-12-23T09:32:19.529019577Z", + "output_generated": "2025-12-23T09:32:19.529217985Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:19.462195291Z", + "publish_time": "2025-12-23T09:32:19.4624041Z", + "first_worker_start": "2025-12-23T09:32:19.462952122Z", + "last_worker_end": "2025-12-23T09:32:19.528161Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:19.462890819Z", + "start_time": "2025-12-23T09:32:19.462952122Z", + "end_time": "2025-12-23T09:32:19.463008824Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:19.463119Z", + "start_time": "2025-12-23T09:32:19.463286Z", + "end_time": "2025-12-23T09:32:19.528161Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:19.46289662Z", + "start_time": "2025-12-23T09:32:19.462970523Z", + "end_time": "2025-12-23T09:32:19.463061926Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:19.462942022Z", + "start_time": "2025-12-23T09:32:19.463013424Z", + "end_time": "2025-12-23T09:32:19.463068327Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3619, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0009ebb1967511741629926ef9f5faea2bb6be24.json b/data/output/0009ebb1967511741629926ef9f5faea2bb6be24.json new file mode 100644 index 0000000..68491b6 --- /dev/null +++ b/data/output/0009ebb1967511741629926ef9f5faea2bb6be24.json @@ -0,0 +1,278 @@ +{ + "file_name": "0009ebb1967511741629926ef9f5faea2bb6be24.txt", + "total_words": 334, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "on", + "count": 11 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "a", + "count": 7 + }, + { + "word": "airlines", + "count": 7 + }, + { + "word": "time", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "at", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "4%.", + "length": 3 + }, + { + "text": "24 per 100,000.", + "length": 15 + }, + { + "text": "At a time when U.", + "length": 17 + }, + { + "text": "8% on-time performance.", + "length": 23 + }, + { + "text": "Last year, it improved to 93.", + "length": 29 + }, + { + "text": "Department of Transportation figures.", + "length": 37 + }, + { + "text": "87 misplaced bags per 1,000 passengers.", + "length": 39 + }, + { + "text": "80 mishandled bags per 1,000 passengers.", + "length": 40 + }, + { + "text": "airlines and is based on an analysis of U.", + "length": 42 + }, + { + "text": "FAA delays closures of 149 control towers .", + "length": 43 + }, + { + "text": ") Seven of the world's most entertaining airports .", + "length": 51 + }, + { + "text": "Boeing does 'final' battery test on 787 Dreamliner .", + "length": 52 + }, + { + "text": "In fact, Hawaiian got even better from 2011, when it had a 92.", + "length": 62 + }, + { + "text": "The Airline Quality Rankings Report looks at the 14 largest U.", + "length": 62 + }, + { + "text": "Eight airlines improved their on-time arrival performance in 2012.", + "length": 66 + }, + { + "text": "ExpressJet and American Airlines had the worst on-time performance (76.", + "length": 71 + }, + { + "text": "Nine of the 14 rated had an on-time arrival percentage of more than 80%.", + "length": 72 + }, + { + "text": "9%) last year, according to the data gathered in the 23rd annual report.", + "length": 72 + }, + { + "text": "Virgin America had the best baggage handling rate of all the airlines (0.", + "length": 73 + }, + { + "text": "When it came to complaints last year, Southwest again had the lowest consumer rate (0.", + "length": 86 + }, + { + "text": ") American Eagle showed improvement from 2011 but still came in last, fumbling baggage at a rate of 5.", + "length": 102 + }, + { + "text": "(CNN) -- If you travel by plane and arriving on time makes a difference, try to book on Hawaiian Airlines.", + "length": 106 + }, + { + "text": "25 per 100,000 passengers) while the distinction of being the airline with the highest consumer complaint rate went to United Airlines (4.", + "length": 138 + }, + { + "text": "It's co-authored by Brent Bowen, the head of the Department of Aviation Technology at Purdue University, and Dean Headley of Wichita State.", + "length": 139 + }, + { + "text": "In 2012, passengers got where they needed to go without delay on the carrier more than nine times out of 10, according to a study released on Monday.", + "length": 149 + }, + { + "text": "airlines are a whipping post for passenger complaints about crowded flights, tight seats, costly tickets and unsatisfactory service, there is a glimmer of hope.", + "length": 160 + }, + { + "text": "In addition to on-time performance, the joint project looks at three other categories: rate of consumer complaints, mishandled bags and denied boarding performance.", + "length": 164 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5774142742156982 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:19.963561543Z", + "first_section_created": "2025-12-23T09:32:19.965247311Z", + "last_section_published": "2025-12-23T09:32:19.965439519Z", + "all_results_received": "2025-12-23T09:32:20.029328587Z", + "output_generated": "2025-12-23T09:32:20.029466892Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:19.965247311Z", + "publish_time": "2025-12-23T09:32:19.965439519Z", + "first_worker_start": "2025-12-23T09:32:19.965893837Z", + "last_worker_end": "2025-12-23T09:32:20.028436Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:19.965904237Z", + "start_time": "2025-12-23T09:32:19.965949039Z", + "end_time": "2025-12-23T09:32:19.965995941Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:19.966179Z", + "start_time": "2025-12-23T09:32:19.966301Z", + "end_time": "2025-12-23T09:32:20.028436Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:19.965839935Z", + "start_time": "2025-12-23T09:32:19.965893837Z", + "end_time": "2025-12-23T09:32:19.965953939Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:19.965849035Z", + "start_time": "2025-12-23T09:32:19.965909038Z", + "end_time": "2025-12-23T09:32:19.965949739Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1936, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0009f60bb58a7d93063d6f75edf05cbfc0874ece.json b/data/output/0009f60bb58a7d93063d6f75edf05cbfc0874ece.json new file mode 100644 index 0000000..95cc955 --- /dev/null +++ b/data/output/0009f60bb58a7d93063d6f75edf05cbfc0874ece.json @@ -0,0 +1,298 @@ +{ + "file_name": "0009f60bb58a7d93063d6f75edf05cbfc0874ece.txt", + "total_words": 687, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "we", + "count": 11 + }, + { + "word": "liverpool", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "that", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "No.", + "length": 3 + }, + { + "text": "Hopefully we will.", + "length": 18 + }, + { + "text": "Are they a better team than us?", + "length": 31 + }, + { + "text": "‘We know we are not playing well.", + "length": 35 + }, + { + "text": "‘(Neville) has said it as a joke.", + "length": 35 + }, + { + "text": "So we’ve got to go there and do it.", + "length": 37 + }, + { + "text": "We’ve got full confidence in the manager.", + "length": 43 + }, + { + "text": "All week criticism has been raining down on them.", + "length": 49 + }, + { + "text": "’ Henderson has developed a strong relationship with Rodgers.", + "length": 63 + }, + { + "text": "We know we need to be doing better and I’m sure Manchester United feel the same.", + "length": 82 + }, + { + "text": "The Premier League table, showing Manchester United in third and Liverpool ninth .", + "length": 82 + }, + { + "text": "I see the quality and character that we’ve got at this club every day in training.", + "length": 84 + }, + { + "text": "So when the topic of criticism is raised with Jordan Henderson, there is a wry smile.", + "length": 85 + }, + { + "text": "That is why he does not shirk the issue as this particular conversation unfolds at Melwood.", + "length": 91 + }, + { + "text": "He doesn’t think either team is playing really well at the minute but that doesn’t bother us.", + "length": 97 + }, + { + "text": "Gerrard scored two penalties in Liverpool's comprehensive 3-0 win at their fierce rivals last season .", + "length": 102 + }, + { + "text": "Despite Gerrard's goal, Liverpool crashed out of the Champions League, continuing their poor campaign .", + "length": 103 + }, + { + "text": "’ Despite their poor form, Henderson is confident Brendan Rodgers (pictured) can guide them to a win .", + "length": 104 + }, + { + "text": "The Liverpool players were in high spirits as they trained ahead of their game against Manchester United .", + "length": 106 + }, + { + "text": "Jordan Henderson (left) wheels away in celebration with Steven Gerrard after the latter scored a free-kick .", + "length": 108 + }, + { + "text": "Gerrard has scored more goals as an away player at Old Trafford than anyone else in Premier League history .", + "length": 108 + }, + { + "text": "‘But it’s all well and good me sitting here and saying that but it’s up to us to go and do the business.", + "length": 110 + }, + { + "text": "From being likened to the Dog and Duck, to being told they are rudderless, to warnings about the manager’s job.", + "length": 113 + }, + { + "text": "‘I’m not saying Man U aren’t a good team because they are and they’ve got fantastic players but so have we.", + "length": 115 + }, + { + "text": "Liverpool’s vice-captain has not shut himself away from the disparaging comments but he didn’t need to hear them.", + "length": 117 + }, + { + "text": "‘The outside world might accept that (it’s all down to the manager) but as players we certainly don’t accept it.", + "length": 118 + }, + { + "text": "‘He’s still confident in the way he wants to play football and is doing everything he can for us to start getting results,’ he said.", + "length": 138 + }, + { + "text": "It is significant, then, to hear Henderson’s appraisal of Rodgers’ mood and methods following Tuesday’s disappointment against Basle.", + "length": 139 + }, + { + "text": "‘You’ve got to take (the Dog and Duck comment) as a bit tongue in cheek,’ said Henderson, gearing up for Sunday’s trip to Old Trafford.", + "length": 143 + }, + { + "text": "‘Although United have been picking up some good results of late and showing some great resilience, I don’t think they’ve played fantastically well.", + "length": 153 + }, + { + "text": "Aware that victory at Old Trafford would dramatically alter the mood around Anfield, Henderson is quick to think back to how it was in March when Liverpool made the short trip down the M62 and terrorised David Moyes’ team.", + "length": 224 + }, + { + "text": "The England midfielder refers to his manager as being ‘one of the best in the world’ and has made no secret of how big an influence he had in reviving a Liverpool career that, at one stage, never looked like coming into bloom.", + "length": 230 + }, + { + "text": "‘We’ve got enough quality and character in this group to win on Sunday and the next game as well and then build from that,’ said Henderson, who has put contract talks to the back of his mind until Liverpool’s results improve.", + "length": 233 + }, + { + "text": "The score that day was 3-0 but it could have been doubled such was Liverpool’s superiority; nine months on, Henderson does not accept United are now the better team, nor does he entertain the idea that Sunday will see Louis van Gaal’s side exact revenge.", + "length": 258 + }, + { + "text": "Tumbling out of the Champions League at such an early stage causes a maelstrom at Anfield and plenty — from Gary Neville to a raft of former Liverpool players — have had something to say about how poorly Brendan Rodgers and his side have performed this season.", + "length": 264 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.800535261631012 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:20.466187946Z", + "first_section_created": "2025-12-23T09:32:20.466583162Z", + "last_section_published": "2025-12-23T09:32:20.466833072Z", + "all_results_received": "2025-12-23T09:32:20.531843085Z", + "output_generated": "2025-12-23T09:32:20.532011491Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:20.466583162Z", + "publish_time": "2025-12-23T09:32:20.466833072Z", + "first_worker_start": "2025-12-23T09:32:20.46728909Z", + "last_worker_end": "2025-12-23T09:32:20.530948Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:20.467257489Z", + "start_time": "2025-12-23T09:32:20.467314891Z", + "end_time": "2025-12-23T09:32:20.467387494Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:20.467433Z", + "start_time": "2025-12-23T09:32:20.467578Z", + "end_time": "2025-12-23T09:32:20.530948Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:20.467194386Z", + "start_time": "2025-12-23T09:32:20.46728909Z", + "end_time": "2025-12-23T09:32:20.467393794Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:20.467236388Z", + "start_time": "2025-12-23T09:32:20.46729589Z", + "end_time": "2025-12-23T09:32:20.467356393Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3856, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/000b8624a0e3cb0e413b94d6a0290883dda9d9d1.json b/data/output/000b8624a0e3cb0e413b94d6a0290883dda9d9d1.json new file mode 100644 index 0000000..decbbbd --- /dev/null +++ b/data/output/000b8624a0e3cb0e413b94d6a0290883dda9d9d1.json @@ -0,0 +1,532 @@ +{ + "file_name": "000b8624a0e3cb0e413b94d6a0290883dda9d9d1.txt", + "total_words": 1647, + "top_n_words": [ + { + "word": "the", + "count": 139 + }, + { + "word": "of", + "count": 75 + }, + { + "word": "in", + "count": 52 + }, + { + "word": "a", + "count": 44 + }, + { + "word": "is", + "count": 29 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "and", + "count": 23 + }, + { + "word": "by", + "count": 16 + }, + { + "word": "for", + "count": 16 + }, + { + "word": "with", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "He now lives in Canada.", + "length": 23 + }, + { + "text": "and things only got worse.", + "length": 26 + }, + { + "text": "We've had our revolution...", + "length": 27 + }, + { + "text": "A worker inside Vakil Mosque, Shiraz.", + "length": 37 + }, + { + "text": "Pictured right is a detail of Persepolis.", + "length": 41 + }, + { + "text": "' Women in the hills above Tehran at dusk.", + "length": 42 + }, + { + "text": "A group of friends in the hills above Tehran.", + "length": 45 + }, + { + "text": "The Mausoleum of Ayatollah Khomeini in Tehran.", + "length": 46 + }, + { + "text": "In Tehran, a collection of modern art valued at $2.", + "length": 51 + }, + { + "text": "5 billion is held by the Museum of Contemporary Art.", + "length": 52 + }, + { + "text": "From there it is transported by vehicle to the cities.", + "length": 54 + }, + { + "text": "Palangan Village, in the mountains near the Iraq border.", + "length": 56 + }, + { + "text": "Work on the unfinished building has dragged over 23 years.", + "length": 58 + }, + { + "text": "Detail of Persepolis, the seat of the Ancient Persian empire.", + "length": 61 + }, + { + "text": "A man in southern Tehran, the working class region of the city.", + "length": 63 + }, + { + "text": "Both soldiers were forced to leave the metro at the next station.", + "length": 65 + }, + { + "text": "In Tehran a can of beer on the black market fetches around $10 USD.", + "length": 67 + }, + { + "text": "Two soldiers being attacked inside the Tehran metro after an argument.", + "length": 70 + }, + { + "text": "' A mural painted on the wall of the former American embassy in Tehran.", + "length": 71 + }, + { + "text": "'For ordinary Iranians though, the government is a constant embarrassment.", + "length": 74 + }, + { + "text": "Under Khomeini Iranians were actively encouraged to produce large families.", + "length": 75 + }, + { + "text": "Public transport: Two young twins wear matching shirts on the Tehran Metro .", + "length": 76 + }, + { + "text": "Two shepherds lead Palangan's flock of communally-owned sheep out to pasture.", + "length": 77 + }, + { + "text": "Chapple said woman are arrested if their dress is considered to be 'immodest'.", + "length": 78 + }, + { + "text": "A commemorative plate of the former Shah of Iran in an antique store in Shiraz.", + "length": 79 + }, + { + "text": "The Book of Kings ends with the Arab invasion, depicted as a disaster for Persia.", + "length": 81 + }, + { + "text": "The mosque now serves as a tourist attraction but sees only a trickle of visitors.", + "length": 82 + }, + { + "text": "If a woman's dress is considered \"immodest\" she is arrested and taken into custody.", + "length": 83 + }, + { + "text": "Outside metro stations female police can be seen regularly checking the passers by.", + "length": 83 + }, + { + "text": "With growing economic chaos in the country, its completion is still nowhere in sight.", + "length": 85 + }, + { + "text": "A young worker walks through the light of a stained glass window in the Tehran Bazaar.", + "length": 86 + }, + { + "text": "Although tourism is on the increase, western tourists still make up only 10 per cent of the total.", + "length": 98 + }, + { + "text": "Bright lights, developing city: View of central Tehran from inside a minaret in Sepahsalar Mosque .", + "length": 99 + }, + { + "text": "Up in the hills: A shepherd leads his flock out to pasture in the mountains on the Iran/Iraq border .", + "length": 101 + }, + { + "text": "conquest of Persia led to a an Islamification of Iran but Farsi, the Iranian language, has remained alive.", + "length": 106 + }, + { + "text": "The 'Book of Kings' has been credited with helping preserve the Farsi language - one of the world's oldest.", + "length": 107 + }, + { + "text": "Said one Tehrani 'we're not naive like the Arabs to think a violent uprising will magically fix everything.", + "length": 107 + }, + { + "text": "The soldier was punched in the head at least four times by an angry crowd of mostly well-dressed young men.", + "length": 107 + }, + { + "text": "He said: 'I was amazed by the difference in western perceptions of the country and what I saw on the ground.", + "length": 108 + }, + { + "text": "'I found most Iranians -- particularly the younger generation -- to be very aware of the world around them...", + "length": 109 + }, + { + "text": "Life is becoming drastically difficult for ordinary Iranians but many feel powerless to change the situation.", + "length": 109 + }, + { + "text": "Azadi (Freedom) Tower, the gateway to Tehran designed in 1966 by a then 24 year old Hossein Amanat, pictured left.", + "length": 114 + }, + { + "text": "The anger at western intervention stoked strong initial support for the virulently anti-western Ayatollah Khomeini.", + "length": 115 + }, + { + "text": "Palangan, illustrative of many of the country's rural settlements, has benefited handsomely from government support.", + "length": 116 + }, + { + "text": "After the Islamic Revolution, hardline clerics called for the destruction of the site, but official unease prevailed.", + "length": 117 + }, + { + "text": "A Kurdish man settles in for a night of guarding some roadworking machinery in the mountains near the Iran/Iraq border.", + "length": 119 + }, + { + "text": "At the Sa'adabad Palace complex in northern Tehran, Islamic revolutionaries sawed a statue of the deposed Shah in half.", + "length": 119 + }, + { + "text": "Chapple said: 'I found most Iranians - particularly the younger generation - to be very aware of the world around them...", + "length": 121 + }, + { + "text": "Rural Basij were used as a part of the crackdown in 2009 which resulted in the deaths of seven anti-government protestors.", + "length": 122 + }, + { + "text": "Wearing head coverings is mandatory for women and female police can be seen regularly checking commuters' dress in the city.", + "length": 124 + }, + { + "text": "The border is rife with smugglers who carry alcohol from Iraq (where alcohol is legal) into the villages on the Iranian side.", + "length": 125 + }, + { + "text": "with a burning desire for the freedoms they feel they are being denied by an out of touch, ultra-conservative religious elite.", + "length": 126 + }, + { + "text": "with a burning desire for the freedoms they feel they are being denied by an out of touch, ultra-conservative religious elite.", + "length": 126 + }, + { + "text": "' In one striking image, the tiny village of Palangan in the mountains near the Iraq border can be seen lit up among the hills.", + "length": 127 + }, + { + "text": "In the past 14 months, tightened sanctions have nearly halved the value of Iran's currency and fueled soaring inflation (source).", + "length": 129 + }, + { + "text": "Many (every single one I met) young Iranians feel deeply embarrassed by their government, and the way the nation is perceived abroad.", + "length": 133 + }, + { + "text": "Today schoolchildren are taken on group visits past the boots and into the palace to see the decadence of the former Shah's living quarters.", + "length": 140 + }, + { + "text": "The 11th century poet Ferdowsi, described as 'Iran's Homer', wrote an epic in Farsi which was carefully crafted with minimal Arabic influence.", + "length": 142 + }, + { + "text": "It is perceived as one of the most introverted countries in the world with a policy of eradicating any outside influence from foreign nations.", + "length": 142 + }, + { + "text": "By 2009 nearly 70 per cent of all Iranians were under 30, but according to some reports, the country is the least religious in the Middle East.", + "length": 143 + }, + { + "text": "As a practicing Bahai'i Hossein was forced to flee Iran after the Islamist government labeled followers of the religion 'unprotected infidels'.", + "length": 143 + }, + { + "text": "In 2010 a senior cleric in Tehran blamed the frequency of earthquakes in Iran on women who 'lead young men astray' with their revealing clothing.", + "length": 145 + }, + { + "text": "Instead of the \"armies for Islam\" Khomeini had called for, the youthful population is now seen as the biggest threat to the deeply unpopular regime.", + "length": 148 + }, + { + "text": "One tourist guide said westerners are scared away by the bloodcurdling rhetoric of a government which is completely out of touch with ordinary Iranians.", + "length": 152 + }, + { + "text": "Other images capture groups of young friends in the hills above the country's capital Tehran who he said were frustrated with the dated regime in the country.", + "length": 158 + }, + { + "text": "That Mosaddeqh had been a democratically-elected leader, with wide popular support fueled resentment at the Shah, who many saw as a brutal puppet for the west.", + "length": 159 + }, + { + "text": "Zac Clayton, a British cyclist who will finish a round-the-world cycle on March 23 described Iran as having the kindest people of any country he cycled through.", + "length": 160 + }, + { + "text": "' Chapple, from New Zealand, has visited the Islamic Republic of Iran three times between December 2011 and January 2013 to accumulate his series of photographs.", + "length": 161 + }, + { + "text": "'In the time I spent there I never received anything but goodwill and decency, which stands in clear contrast to my experience in other middle eastern countries.", + "length": 161 + }, + { + "text": "The government's spending in some rural regions has bought them a network of loyal followers who can be scrambled at any time to crush trouble in the urban centers.", + "length": 164 + }, + { + "text": "With its tiny villages nestled into the side of mountains and picturesque farm land, which is rarely seen by outsiders, the country is as enchanting as it is mysterious.", + "length": 169 + }, + { + "text": "But a photographer's stunning collection of images from his journey through the Republic of Iran offers a rare insight into what life in the Islamic state is really like.", + "length": 170 + }, + { + "text": "\"Operation Ajax\" was actioned after Mosaddegh nationalized the petroleum industry of Iran, thus shutting out British dominance of an industry they had controlled since 1913.", + "length": 173 + }, + { + "text": "But photographer Amos Chapple said the real surprise of Persia was not its untouched and beautiful countryside, but how different it is from 'western perceptions of the country'.", + "length": 178 + }, + { + "text": "And he claims while the government continues its anti-western campaign, he found a growing discontent among the country's youth who were embarrassed by the actions of its leaders.", + "length": 179 + }, + { + "text": "Many rural settlements in the country are propped up by government funding with villagers often being paid members of the Basij - whose remit includes prevention of 'westoxification'.", + "length": 183 + }, + { + "text": "The Shah was given an Authoritarian hold on power thanks to an MI6 and CIA-backed coup in 1953 which deposed Prime Minister Mohammad Mosaddegh and cost the lives of several hundreds of Iranian citizens.", + "length": 202 + }, + { + "text": "Their role is to help preserve the Islamic way of life, such as the strict rules on female clothing and the interaction of men and women, which became immersed in Iranian law in the 1979 Islamic revolution.", + "length": 206 + }, + { + "text": "Concealing clothing in the Islamic Republic, including head coverings, is mandatory for women, but the exact definition of 'modest' is flexible, leading to a tug of war between young females and the authorities each spring.", + "length": 223 + }, + { + "text": "Murals such as this are at odds with statistics showing that, despite American sanctions, and the American-led coup against a elected and popular prime minister, more Iranians feel positively about America than do Turks or Indians.", + "length": 231 + }, + { + "text": "In a little-publicized exhibition in 2011 the works, including pieces by Warhol (pictured), Pollock, Munch, Hockney and Rothko were put on display for the first time since 1979 when the owner of the art, Queen Farah Pahlavi was forced to flee Iran with her husband, the late Shah of Iran.", + "length": 288 + }, + { + "text": "'I think because access for journalists is so difficult, people have a skewed image of what Iran is - the regime actually want to portray the country as a cauldron of anti-western sentiment so they syndicate news footage of chanting nutcases which is happily picked up by overseas networks.", + "length": 290 + }, + { + "text": "Many villagers are employed in a nearby fish farm, or are paid members of the Basij, whose remit includes prevention of 'westoxification', and the preservation of everything the 1979 Islamic revolution and its leader the Ayatollah Khomeini stood for, including strict rules on female clothing and male/female interaction.", + "length": 321 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5331194847822189 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:20.967238985Z", + "first_section_created": "2025-12-23T09:32:20.9676047Z", + "last_section_published": "2025-12-23T09:32:20.968168022Z", + "all_results_received": "2025-12-23T09:32:21.070533237Z", + "output_generated": "2025-12-23T09:32:21.070721244Z", + "total_processing_time_ms": 103, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 102, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:20.9676047Z", + "publish_time": "2025-12-23T09:32:20.967914112Z", + "first_worker_start": "2025-12-23T09:32:20.96836653Z", + "last_worker_end": "2025-12-23T09:32:21.061918Z", + "total_journey_time_ms": 94, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:20.968290127Z", + "start_time": "2025-12-23T09:32:20.96836653Z", + "end_time": "2025-12-23T09:32:20.968449234Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:20.96854Z", + "start_time": "2025-12-23T09:32:20.968685Z", + "end_time": "2025-12-23T09:32:21.061918Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 93 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:20.968305928Z", + "start_time": "2025-12-23T09:32:20.96837403Z", + "end_time": "2025-12-23T09:32:20.968526937Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:20.968308428Z", + "start_time": "2025-12-23T09:32:20.968386331Z", + "end_time": "2025-12-23T09:32:20.968460034Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:20.967990015Z", + "publish_time": "2025-12-23T09:32:20.968168022Z", + "first_worker_start": "2025-12-23T09:32:20.968504736Z", + "last_worker_end": "2025-12-23T09:32:21.069704Z", + "total_journey_time_ms": 101, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:20.968458734Z", + "start_time": "2025-12-23T09:32:20.968504736Z", + "end_time": "2025-12-23T09:32:20.968586939Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:20.968726Z", + "start_time": "2025-12-23T09:32:20.968841Z", + "end_time": "2025-12-23T09:32:21.069704Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 100 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:20.968506136Z", + "start_time": "2025-12-23T09:32:20.968589539Z", + "end_time": "2025-12-23T09:32:20.968743045Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:20.968397631Z", + "start_time": "2025-12-23T09:32:20.968514536Z", + "end_time": "2025-12-23T09:32:20.968586739Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 193, + "min_processing_ms": 93, + "max_processing_ms": 100, + "avg_processing_ms": 96, + "median_processing_ms": 100, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4907, + "slowest_section_id": 1, + "slowest_section_time_ms": 101 + } +} diff --git a/data/output/000beb8706bc2ad4c0c3040faf0ce54caa315454.json b/data/output/000beb8706bc2ad4c0c3040faf0ce54caa315454.json new file mode 100644 index 0000000..2584463 --- /dev/null +++ b/data/output/000beb8706bc2ad4c0c3040faf0ce54caa315454.json @@ -0,0 +1,238 @@ +{ + "file_name": "000beb8706bc2ad4c0c3040faf0ce54caa315454.txt", + "total_words": 275, + "top_n_words": [ + { + "word": "and", + "count": 11 + }, + { + "word": "the", + "count": 10 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "his", + "count": 8 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "irvine", + "count": 7 + }, + { + "word": "sir", + "count": 7 + }, + { + "word": "he", + "count": 6 + }, + { + "word": "was", + "count": 6 + }, + { + "word": "by", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "08:40 EST, 31 December 2012 .", + "length": 29 + }, + { + "text": "18:08 EST, 30 December 2012 .", + "length": 29 + }, + { + "text": "He was awarded an OBE in 1980.", + "length": 30 + }, + { + "text": "Lucy Osborne and Rosie Taylor .", + "length": 31 + }, + { + "text": "They had two children Matthew and Suzanne and two grandchildren.", + "length": 64 + }, + { + "text": "He was a much loved husband of Lynda and father of Suzanne and Matthew.", + "length": 71 + }, + { + "text": "’ ‘He’ll be sadly missed by his brothers and by all all his family and friends.", + "length": 85 + }, + { + "text": "The former MP accused of saying Liverpool fans caused the Hillsborough tragedy has died aged 83.", + "length": 96 + }, + { + "text": "Sir Irvine Patnick represented Sheffield Hallam – now Nick Clegg’s constituency – from 1987 to 1997.", + "length": 106 + }, + { + "text": "Rest in peace: Sir Irvine Patnick, pictured receiving his knighthood at Buckingham Palace, has died today aged 83 .", + "length": 115 + }, + { + "text": "A statement from his family said: ‘Sir Irvine Patnick OBE, died peacefully on December 30, aged 83, in Sheffield.", + "length": 115 + }, + { + "text": "Sir Irvine, who chose to go by his middle name instead of his first name Cyril, lived in Sheffield with his wife of 52 years Lynda.", + "length": 131 + }, + { + "text": "’ Justice for the 96: Sir Irvine was named as the source for a newspaper story accusing Liverpool fans of causing the Hillsborough tragedy .", + "length": 142 + }, + { + "text": "Sir Irvine apologised for his comments, saying he was ‘deeply and sincerely sorry’ and that he had been given ‘wholly inaccurate’ information by police officers.", + "length": 169 + }, + { + "text": "Sir Irvine was vice-president of Sheffield’s Kingfield Synagogue, life president of Sheffield Jewish Representative Council, and a former national vice-chairman of the Maccabi sports and youth organisation.", + "length": 208 + }, + { + "text": "He was criticised in the Hillsborough Independent Panel’s report in September, which named him as The Sun newspaper’s source for its ‘The Truth’ story, smearing Liverpool fans after the disaster in which 96 died.", + "length": 220 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6902032494544983 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:21.46893465Z", + "first_section_created": "2025-12-23T09:32:21.469256863Z", + "last_section_published": "2025-12-23T09:32:21.46943527Z", + "all_results_received": "2025-12-23T09:32:21.53636166Z", + "output_generated": "2025-12-23T09:32:21.536501766Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:21.469256863Z", + "publish_time": "2025-12-23T09:32:21.46943527Z", + "first_worker_start": "2025-12-23T09:32:21.469968291Z", + "last_worker_end": "2025-12-23T09:32:21.53554Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:21.469944991Z", + "start_time": "2025-12-23T09:32:21.470010293Z", + "end_time": "2025-12-23T09:32:21.470046995Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:21.470193Z", + "start_time": "2025-12-23T09:32:21.470336Z", + "end_time": "2025-12-23T09:32:21.53554Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:21.46992409Z", + "start_time": "2025-12-23T09:32:21.469984292Z", + "end_time": "2025-12-23T09:32:21.470034894Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:21.469901989Z", + "start_time": "2025-12-23T09:32:21.469968291Z", + "end_time": "2025-12-23T09:32:21.470010493Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1687, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/000bee334a505220bfcc4ec154d1f7810f9928ca.json b/data/output/000bee334a505220bfcc4ec154d1f7810f9928ca.json new file mode 100644 index 0000000..db806e6 --- /dev/null +++ b/data/output/000bee334a505220bfcc4ec154d1f7810f9928ca.json @@ -0,0 +1,266 @@ +{ + "file_name": "000bee334a505220bfcc4ec154d1f7810f9928ca.txt", + "total_words": 625, + "top_n_words": [ + { + "word": "the", + "count": 35 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "can", + "count": 12 + }, + { + "word": "it", + "count": 11 + }, + { + "word": "as", + "count": 10 + }, + { + "word": "that", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "Time can be crucial when diagnosing illness.", + "length": 44 + }, + { + "text": "The procedure can cause discomfort and pain.", + "length": 44 + }, + { + "text": "Furthermore, it often has to be repeated, as the sampling is not always accurate.", + "length": 81 + }, + { + "text": "In small organisms, including zebrafish larvae, Scape can see through the entire organism.", + "length": 90 + }, + { + "text": "Jessell, co-director of the Zuckerman Institute and Claire Tow Professor of Motor Neuron Disorders.", + "length": 99 + }, + { + "text": "'It wasn't until we built it that we realized it was a light-sheet microscope,' said Professor Hillman.", + "length": 103 + }, + { + "text": "'This combination makes Scape both fast and very simple to use, as well as surprisingly inexpensive,' she said.", + "length": 111 + }, + { + "text": "Professor Hillman has already used the system to observe firing in 3D neurons trees in layers of the mouse brain.", + "length": 113 + }, + { + "text": "Scientists have developed a new microscope that can image living things in 3D, ten to 100 times faster than existing technologies.", + "length": 130 + }, + { + "text": "Dubbed 'Scape', the technology can also help unlock the secrets of brain activity by monitoring neurons as they fire in real-time.", + "length": 130 + }, + { + "text": "Scientists have developed a new microscope that can image living things in 3D, ten to 100 times faster than existing technologies.", + "length": 130 + }, + { + "text": "'This methodological advance offers the potential to unlock the secrets of brain activity in ways barely imaginable a few years ago.", + "length": 132 + }, + { + "text": "The endomicroscope was developed in France and is so powerful it can see individual cells and the blood vessels that run between them.", + "length": 134 + }, + { + "text": "'We think it will be transformative in bringing the ability to capture high-speed 3D cellular activity to a wide range of living samples.", + "length": 137 + }, + { + "text": "' Next-generation versions of Scape are in development that will deliver even better speed, resolution, sensitivity, and penetration depth.", + "length": 139 + }, + { + "text": "The system could be used to capture remarkable details, such as single cells in the zebrafish heart while it is beating, the researchers say.", + "length": 141 + }, + { + "text": "By tracking these tiny, unrestrained creatures in 3D at high speeds, the microscope can capture both cellular structure and function and behaviour.", + "length": 147 + }, + { + "text": "' Imaging techniques that can capture these dizzying dynamic processes, such as neurons firing in the brain, have lagged behind other breakthroughs.", + "length": 148 + }, + { + "text": "'It took us a while to realize how versatile the imaging geometry was, how simple and inexpensive the layout was—and just how many problems we had overcome.", + "length": 158 + }, + { + "text": "'With Scape, we can now image complex, living things, such as neurons firing in the rodent brain…this has not been possible until now,' said Professor Hillman.", + "length": 161 + }, + { + "text": "And with conditions such as cancer, the current method of testing samples of tissue for disease - a biopsy - can be a slow process, with an anxious wait for results.", + "length": 165 + }, + { + "text": "'Hillman's sophistication in optical physics has led her to develop a new imaging technique that permits large-scale detection of neuronal firing in three-dimensional brain tissues.", + "length": 181 + }, + { + "text": "'Deciphering the functions of brain and mind demands improved methods for visualizing, monitoring, and manipulating the activity of neural circuits in natural settings,' says Thomas M.", + "length": 184 + }, + { + "text": "It is already being used for pancreatic cancer, a disease with a poor prognosis as it's often detected late, and scientists are now looking at using it for colon, bladder, oesophageal and lung cancer.", + "length": 200 + }, + { + "text": "Now, scientists have developed the world's smallest microscope - the size of a pin - which is inserted into the body, allowing doctors to 'see' cancer and make an instant, precise diagnosis, saving the patient from the need for a biopsy.", + "length": 237 + }, + { + "text": "While conventional light-sheet microscopes use two awkwardly positioned lenses, Professor Hillman realised that she could use a single lens, and then that she could sweep the light sheet to generate 3D images without moving the objective or the sample.", + "length": 252 + }, + { + "text": "Dubbed 'Scape', the technology can also help unlock the secrets of brain activity by monitoring neurons as they fire in real-time (pictured) The microscope was developed by Elizabeth Hillman, associate professor of biomedical engineering at Columbia University Medical Center (CUMC), .", + "length": 285 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.44456467032432556 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:21.969136382Z", + "first_section_created": "2025-12-23T09:32:21.969499296Z", + "last_section_published": "2025-12-23T09:32:21.969725005Z", + "all_results_received": "2025-12-23T09:32:22.032393424Z", + "output_generated": "2025-12-23T09:32:22.032565831Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:21.969499296Z", + "publish_time": "2025-12-23T09:32:21.969725005Z", + "first_worker_start": "2025-12-23T09:32:21.970254227Z", + "last_worker_end": "2025-12-23T09:32:22.031453Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:21.970208925Z", + "start_time": "2025-12-23T09:32:21.970274527Z", + "end_time": "2025-12-23T09:32:21.970381932Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:21.970442Z", + "start_time": "2025-12-23T09:32:21.970563Z", + "end_time": "2025-12-23T09:32:22.031453Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:21.970187124Z", + "start_time": "2025-12-23T09:32:21.970254227Z", + "end_time": "2025-12-23T09:32:21.970359431Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:21.970213225Z", + "start_time": "2025-12-23T09:32:21.970305529Z", + "end_time": "2025-12-23T09:32:21.970431934Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3903, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/000c0265cb9c98336a0d67979fa91038a7cab075.json b/data/output/000c0265cb9c98336a0d67979fa91038a7cab075.json new file mode 100644 index 0000000..5a38c72 --- /dev/null +++ b/data/output/000c0265cb9c98336a0d67979fa91038a7cab075.json @@ -0,0 +1,258 @@ +{ + "file_name": "000c0265cb9c98336a0d67979fa91038a7cab075.txt", + "total_words": 518, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "his", + "count": 10 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "on", + "count": 7 + }, + { + "word": "we", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "' she wrote.", + "length": 12 + }, + { + "text": "What a laugh!", + "length": 13 + }, + { + "text": "'Every toy is new!", + "length": 18 + }, + { + "text": "'Every toy is NEW!", + "length": 18 + }, + { + "text": "This is such a happy day!!", + "length": 26 + }, + { + "text": "'His brain is definitely receiving sound.", + "length": 41 + }, + { + "text": "'As long as he is happy and loving it, we are happy!", + "length": 52 + }, + { + "text": "'His face lighting up to the sounds is SO incredible!", + "length": 53 + }, + { + "text": "' And it seems as though life is full of laughs for Ryan these days.", + "length": 68 + }, + { + "text": "'Being a micropreemie isn't easy, especially when you end up staying in the NICU 7 months.", + "length": 90 + }, + { + "text": "Ryan Aprea couldn't stop giggling when he heard his mother Jennifer's voice for the first time .", + "length": 96 + }, + { + "text": "'These are VERY low levels of sound input because we don't want to overwhelm him and cause him to reject it.", + "length": 108 + }, + { + "text": "If this develops into hearing comprehension and oral language down the road, that will be icing on the cake!", + "length": 108 + }, + { + "text": "' Hours after posting the video, Jennifer shared this image of her son enjoying the sounds of his toys at home .", + "length": 112 + }, + { + "text": "Feeling better: The youngster spent seven months in the neonatal intensive care unit after being born prematurely .", + "length": 115 + }, + { + "text": "The audiologist programmed the device at three different levels and we will gradually increase them throughout the week.", + "length": 120 + }, + { + "text": "Making a memory: The family captured the heartwarming moment on video and quickly shared the clip with their followers on Facebook .", + "length": 132 + }, + { + "text": "'We're heading home and it's all good news after the mapping and activation,' the mother-of-two, who founded cloth diaper brand Spray Pal, wrote.", + "length": 145 + }, + { + "text": "'If you've been following our journey for a while, you probably already know that [Ryan] was born at 25 weeks,' she wrote on the page in October.", + "length": 145 + }, + { + "text": "' Ryan, who was born in October 2012 when he was just 25 weeks old, spent seven months in an intensive care unit before being allowed to return home with his family.", + "length": 165 + }, + { + "text": "We filmed him each time we tested the new level, and he giggled like this when he heard my voice at every level which was the most amazing feeling because we couldn't have asked for a better response.", + "length": 200 + }, + { + "text": "The moment when a young boy with severe hearing difficulties heard his mother's voice for the very first time has been captured in a heartwarming video clip, which shows the youngster dissolving into a fit of giggles every time she speaks.", + "length": 239 + }, + { + "text": "Since then, Jennifer has continued to document every stage of his life, sharing every success and struggle with her followers on Facebook, from Ryan's first few weeks in the neonatal intensive care unit to his cochlear surgery in November.", + "length": 239 + }, + { + "text": "Just hours after posting the video of her son's infectious giggling fits, Jennifer shared some photos of the toddler playing with all of his toys at home, adding that the experience was totally new to him now that he is able to actually hear them.", + "length": 247 + }, + { + "text": "Entrepreneur Jennifer Colson Aprea, from Huntington Beach, California, posted the clip of her two-year-old son Ryan on her company's Facebook page on Monday, revealing that her son had successfully been fitted with a cochlear implant which is enabling him to hear sounds for the first time in his life.", + "length": 302 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.38165274262428284 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:22.470525834Z", + "first_section_created": "2025-12-23T09:32:22.470844647Z", + "last_section_published": "2025-12-23T09:32:22.471048055Z", + "all_results_received": "2025-12-23T09:32:22.532704334Z", + "output_generated": "2025-12-23T09:32:22.53286474Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:22.470844647Z", + "publish_time": "2025-12-23T09:32:22.471048055Z", + "first_worker_start": "2025-12-23T09:32:22.471556076Z", + "last_worker_end": "2025-12-23T09:32:22.531876Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:22.471577877Z", + "start_time": "2025-12-23T09:32:22.471635279Z", + "end_time": "2025-12-23T09:32:22.471692881Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:22.471955Z", + "start_time": "2025-12-23T09:32:22.472093Z", + "end_time": "2025-12-23T09:32:22.531876Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:22.471521574Z", + "start_time": "2025-12-23T09:32:22.471584977Z", + "end_time": "2025-12-23T09:32:22.471679981Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:22.471486073Z", + "start_time": "2025-12-23T09:32:22.471556076Z", + "end_time": "2025-12-23T09:32:22.471611678Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2885, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/000c5ee0134182945f21c67d4e8af59259cf77a2.json b/data/output/000c5ee0134182945f21c67d4e8af59259cf77a2.json new file mode 100644 index 0000000..2accb41 --- /dev/null +++ b/data/output/000c5ee0134182945f21c67d4e8af59259cf77a2.json @@ -0,0 +1,528 @@ +{ + "file_name": "000c5ee0134182945f21c67d4e8af59259cf77a2.txt", + "total_words": 1598, + "top_n_words": [ + { + "word": "the", + "count": 74 + }, + { + "word": "and", + "count": 41 + }, + { + "word": "he", + "count": 39 + }, + { + "word": "to", + "count": 37 + }, + { + "word": "a", + "count": 36 + }, + { + "word": "in", + "count": 29 + }, + { + "word": "of", + "count": 29 + }, + { + "word": "collins", + "count": 23 + }, + { + "word": "was", + "count": 20 + }, + { + "word": "that", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "Our rock.", + "length": 9 + }, + { + "text": "Fun and teasing.", + "length": 16 + }, + { + "text": "1 Wednesday morning.", + "length": 20 + }, + { + "text": "'He wasn't that guy.", + "length": 20 + }, + { + "text": "Scroll down for videos .", + "length": 24 + }, + { + "text": "He's not that guy to me.", + "length": 24 + }, + { + "text": "'It must be buried in him.", + "length": 26 + }, + { + "text": "No one ever suspected a thing.", + "length": 30 + }, + { + "text": "He really was a father figure.", + "length": 30 + }, + { + "text": "That's how low he is right now.", + "length": 31 + }, + { + "text": "He was always very present with us.", + "length": 35 + }, + { + "text": "No one wants to leave him by himself.", + "length": 37 + }, + { + "text": "He was our glue that held us together.", + "length": 38 + }, + { + "text": "'They fear he could take his own life.", + "length": 38 + }, + { + "text": "He wasn't someone with secrets and angst.", + "length": 41 + }, + { + "text": "\"The family are mortified and embarrassed.", + "length": 42 + }, + { + "text": "'We are all blown away by the revelations.", + "length": 42 + }, + { + "text": "\"The family feels absolutely betrayed by him.", + "length": 45 + }, + { + "text": "'We all had dinner together a few months ago.", + "length": 45 + }, + { + "text": "currently being investigated for child molestation.", + "length": 51 + }, + { + "text": "I wish him the best of luck to make it through this.", + "length": 52 + }, + { + "text": "'He took care of me and all of us during those years.", + "length": 53 + }, + { + "text": "'There were never any signs Stephen had these issues.", + "length": 53 + }, + { + "text": "They had a reunion just last month and everyone loved it.", + "length": 57 + }, + { + "text": "The woman named Ilene called into Boston radio station Mix 104.", + "length": 63 + }, + { + "text": "Knowing Stephen the way I do, I find it hard to put this together.", + "length": 66 + }, + { + "text": "' Calls to Collins' representatives were not returned to MailOnline.", + "length": 68 + }, + { + "text": "He never did anything sneaky when I knew him and I saw him every day.", + "length": 69 + }, + { + "text": "All we did is laugh all day long at work and a lot of it was because of him.", + "length": 76 + }, + { + "text": "\"With everything that has come to light they fear there could be other victims.", + "length": 79 + }, + { + "text": "” Yesterday cast and crew of 7th Heaven were said to be reeling over the news.", + "length": 80 + }, + { + "text": "Taught us to read music a little bit and would make up songs for any occasion for us.", + "length": 85 + }, + { + "text": "If this is true, it's disturbing because there was no outside appearance of problems.", + "length": 85 + }, + { + "text": "I hope he goes to rehab and comes out the same role model rock star that we know him as.", + "length": 88 + }, + { + "text": "It turned out to be a false alarm sparked by a member of the media who heard a loud pop .", + "length": 89 + }, + { + "text": "They remain concerned however for Stephen because they now know he has deep seeded issues.", + "length": 90 + }, + { + "text": "'Everyone who has worked with him has said he's amazing and lovely and one of the nicest guys ever.", + "length": 99 + }, + { + "text": "Reruns of 7th Heaven are not being broadcast and he will no longer appear on the hit TV show Scandal.", + "length": 101 + }, + { + "text": "'I should have followed my gut then, and then again 14 years ago, and kicked your ass to the curb,' she added.", + "length": 110 + }, + { + "text": "He's not the man they all thought he was and that's incredibly hurtful and confusing,' said the family source.", + "length": 110 + }, + { + "text": "Grant has also come out to say Collins even had incestuous thoughts about their only child when she was pregnant .", + "length": 114 + }, + { + "text": "Too young: The dirty dalliance between  Felicity's Keri Russell and Reverend Camden from 7th Heaven ended badly .", + "length": 114 + }, + { + "text": "Some are even wondering if he admitted everything he's done on those tapes, or if there's more he's keeping secret.", + "length": 115 + }, + { + "text": "The family source, who knows several cast members and often visited the 7th Heaven set, said: 'No one had any idea.", + "length": 115 + }, + { + "text": "In an interview with MailOnline, a cast member, who asked not to be identified, said, 'I'm very confused right now.", + "length": 115 + }, + { + "text": "Now those closest to the 67-year-old are deeply concerned the star has hit rock bottom and fear he may harm himself.", + "length": 116 + }, + { + "text": "The actor allegedly made a written confession to his now ex-wife Faye Grant in 2012, which sparked the therapy session.", + "length": 119 + }, + { + "text": "LAPD officers rushed to Tarzana, California, after getting a 911 call about a shot fired inside Collins' house last night.", + "length": 122 + }, + { + "text": "An NYPD official confirmed to MailOnline that they had received a complaint and the Special Victims Squad is investigating.", + "length": 123 + }, + { + "text": "Ar stake is property worth $13million including over $5 million in real estate and Collins' $100,000 worth of vintage guitars.", + "length": 126 + }, + { + "text": "There's so much that he has already hidden from everyone he's worked and lived with, they don't know what could come out next.", + "length": 126 + }, + { + "text": "And the LAPD is also reviewing their own 2012 investigation into Collins, and will be collaborating with New York authorities.", + "length": 126 + }, + { + "text": "'It's like the neighbors of the serial killer with bodies buried in the basement who always say, 'He was always such a nice guy'.", + "length": 129 + }, + { + "text": "Collins' neighbor, former Playboy model and Baywatch star Donna D'Errico, tweeted just after 8pm PT that the actor shot himself .", + "length": 129 + }, + { + "text": "Before the storm: Collins flew from New York to Los Angeles on Monday, the day before the child molestation allegations surfaced .", + "length": 130 + }, + { + "text": "Family reunion: Last month Collins tweeted this photo of a cast reunion dinner attended by his former co-star Jessica Biel, center .", + "length": 132 + }, + { + "text": "' His estranged wife Faye Grant has now claimed that Collins even had incestuous thoughts about their only child when she was pregnant.", + "length": 135 + }, + { + "text": "Law enforcement in California received the therapy session recordings two years ago but closed their case after finding 'no verified victim'.", + "length": 141 + }, + { + "text": "The family of Stephen Collins fear the 7th Heaven star is on the brink of suicide over shocking allegations he molested several underage girls.", + "length": 143 + }, + { + "text": "False alarm: Police arrived at a house where Stephen Collins was believed to be staying in Tarzana, California, in response to the false report .", + "length": 145 + }, + { + "text": "'His family by no means condone what he's said to have done, but it doesn't stop them worrying about him,' a close family friend told MailOnline.", + "length": 145 + }, + { + "text": "She says just before giving birth, he told her how glad he was they were having a girl, who they named Kate, instead of a boy who he may have abused.", + "length": 149 + }, + { + "text": "The family source says cast and crew of 7th Heaven - the popular long-running family drama which ended in 2007 - are in 'shock' over the bombshell claims.", + "length": 154 + }, + { + "text": "Neighbor tweets: Collins' neighbor, former Baywatch star and Playboy model Donna D'Errico, tweeted just after 8pm pacific time that the actor shot himself .", + "length": 156 + }, + { + "text": "Sex tape: Collins' estranged wife Faye Grant secretly recorded the session with her therapist in which he admitted to sexually abusing three underage girls .", + "length": 157 + }, + { + "text": "She claims the actor used his celebrity status to prey on underage girls and 'engender the trust of the families of the children he molested' in a decade of abuse.", + "length": 163 + }, + { + "text": "In the recorded therapy session, Collins seemingly admits that he exposed himself to several young girls between the ages of 10 and 13 in both Los Angeles and New York.", + "length": 168 + }, + { + "text": "And in explosive court documents Grant reveals she was 'sickened' after her husband allegedly confessed to living a vile 'secret life' in which he abused several children.", + "length": 171 + }, + { + "text": "Shock: 7th Heaven cast members can't believe the claims against the TV dad, who played  Rev Eric Camden on the long-running family drama about parents raising seven children.", + "length": 175 + }, + { + "text": "The actor, who played a beloved pastor on the hit family friendly show, has been left distraught after his taped confessions made during a marriage therapy session were made public.", + "length": 181 + }, + { + "text": "The cast included  (left to right) are David Gallagher, Jessica Biel, Barry Watson, Beverly Mitchell, Mackenzie Rosman (seated with dog Happy ), Catherine Hicks and Stephen Collins .", + "length": 183 + }, + { + "text": "Oops: After it was determined that there was no shotgun fire or suicide last night, D'Errico left a statement taped to her door stating her side of what she did and asking for privacy .", + "length": 185 + }, + { + "text": "According to court papers obtained by MailOnline, Grant is attempting to indemnify herself and the divorce settlement from  settlement any civil action stemming from the pediphelia charges.", + "length": 190 + }, + { + "text": "Following the tapes released by TMZ on Tuesday, Collins life and career have been falling apart as he resigned from his position on the Screen Actors Guild board and was fired from the film Ted 2.", + "length": 196 + }, + { + "text": "Leering: Collins starred  in the 1996 Lifetime movie The Babysitter's Seduction in which he played a recently widowed dad who becomes obsessed with his childrens' babysitter, played by Keri Russell .", + "length": 200 + }, + { + "text": "Ironically Collins starred in the 1996 Lifetime movie The Babysitter's Seduction in which he played a recently widowed dad who becomes obsessed with his childrens' teenage babysitter, played by Keri Russell.", + "length": 207 + }, + { + "text": "Grant secretly recorded the session under the advisement of her lawyer, who told her it was legal in California to record conversations in order to gather evidence on a person who has committed a violent felony.", + "length": 211 + }, + { + "text": "But a source close to Collins told the gossip site that the allegations are 'absolutely untrue,' that he never received that email and Grant never brought these claims up during their contentious divorce proceedings.", + "length": 216 + }, + { + "text": "Devastated Grant also revealed Collins was treated for a sex addiction and was seeing a 'sexual dysfunction' therapist, but he refused to seek proper help or hospitalization for his 'predilection towards children', she said.", + "length": 224 + }, + { + "text": "On top of that, a Massachusetts woman who worked as a nanny in New York in the 1990s, in the same building as Collins, has come forward to reveal his strange daily visits in pajamas and the 'semi-pornographic novel' he was writing.", + "length": 231 + }, + { + "text": "Broken trust: In bombshell divorce documents obtained by MailOnline, Stephen Collins  estranged wife Faye Grant claims the actor used his celebrity status to prey on underage girls and ‘engender the trust of the families of the children he molested’ in a decade of abuse.", + "length": 276 + }, + { + "text": "'The comment you made just before I gave birth to our daughter when you said you hoped we didn't have a little boy, because \"you just didn't know if you could keep his little penis out of your mouth\" was indication enough that you were sick,' Grant wrote in an email obtained by TMZ.", + "length": 283 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7088530361652374 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:22.972171097Z", + "first_section_created": "2025-12-23T09:32:22.972567213Z", + "last_section_published": "2025-12-23T09:32:22.973092534Z", + "all_results_received": "2025-12-23T09:32:23.083314865Z", + "output_generated": "2025-12-23T09:32:23.083554074Z", + "total_processing_time_ms": 111, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 110, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:22.972567213Z", + "publish_time": "2025-12-23T09:32:22.972826824Z", + "first_worker_start": "2025-12-23T09:32:22.973381346Z", + "last_worker_end": "2025-12-23T09:32:23.082257Z", + "total_journey_time_ms": 109, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:22.97347605Z", + "start_time": "2025-12-23T09:32:22.973568454Z", + "end_time": "2025-12-23T09:32:22.973690558Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:22.973651Z", + "start_time": "2025-12-23T09:32:22.973799Z", + "end_time": "2025-12-23T09:32:23.082257Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 108 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:22.973310143Z", + "start_time": "2025-12-23T09:32:22.973381346Z", + "end_time": "2025-12-23T09:32:22.973520752Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:22.973376846Z", + "start_time": "2025-12-23T09:32:22.973459949Z", + "end_time": "2025-12-23T09:32:22.974501591Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 1 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:22.972892826Z", + "publish_time": "2025-12-23T09:32:22.973092534Z", + "first_worker_start": "2025-12-23T09:32:22.973500251Z", + "last_worker_end": "2025-12-23T09:32:23.061713Z", + "total_journey_time_ms": 88, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:22.973504051Z", + "start_time": "2025-12-23T09:32:22.973577654Z", + "end_time": "2025-12-23T09:32:22.973674758Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:22.973665Z", + "start_time": "2025-12-23T09:32:22.973802Z", + "end_time": "2025-12-23T09:32:23.061713Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 87 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:22.973449349Z", + "start_time": "2025-12-23T09:32:22.973500251Z", + "end_time": "2025-12-23T09:32:22.973615155Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:22.973459149Z", + "start_time": "2025-12-23T09:32:22.973507451Z", + "end_time": "2025-12-23T09:32:22.973595555Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 195, + "min_processing_ms": 87, + "max_processing_ms": 108, + "avg_processing_ms": 97, + "median_processing_ms": 108, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 1, + "min_processing_ms": 0, + "max_processing_ms": 1, + "avg_processing_ms": 0, + "median_processing_ms": 1, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4558, + "slowest_section_id": 0, + "slowest_section_time_ms": 109 + } +} diff --git a/data/output/000c8035a65520d60c68b9874811d6c8a26b5065.json b/data/output/000c8035a65520d60c68b9874811d6c8a26b5065.json new file mode 100644 index 0000000..803d4b2 --- /dev/null +++ b/data/output/000c8035a65520d60c68b9874811d6c8a26b5065.json @@ -0,0 +1,254 @@ +{ + "file_name": "000c8035a65520d60c68b9874811d6c8a26b5065.txt", + "total_words": 641, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "and", + "count": 23 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "s", + "count": 12 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "that", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "Unveiled: Mauricio Pochettino has made the move from Southampton to Tottenham .", + "length": 79 + }, + { + "text": "Outgoing: And Jay Rodriguez would have been another were it not for his injury .", + "length": 80 + }, + { + "text": "VIDEO Scroll down to see Mauricio Pochettino confirmed as new Tottenham manager .", + "length": 81 + }, + { + "text": "Battler: Davis fighting West Brom's Youssouf Mulumbu (left) and James Morrison for the ball .", + "length": 93 + }, + { + "text": "Belief: Steven Davis reckons that Southampton can continue to progress without Mauricio Pochettino .", + "length": 100 + }, + { + "text": "' Moving: Luke Shaw and Adam Lallana are two of Southampton's young stars that have been tipped for a move .", + "length": 108 + }, + { + "text": "'There is no reason why that should stop now and everyone will want to get going forward to achieve our goals.", + "length": 110 + }, + { + "text": "'The club has come a long way in the last five years and the blueprint has been evident under each and every manager.", + "length": 117 + }, + { + "text": "'I hope that is still the case and all the lads really enjoy working with each other and trying to push the club forward.", + "length": 121 + }, + { + "text": "'It's a blow for everybody to lose him, you can't really get away from that, and the players all loved working for him,' said Davis.", + "length": 132 + }, + { + "text": "'There's obviously rumours even before the manager left and that was well documented but that was based on us having a really good season.", + "length": 138 + }, + { + "text": "'I'm sure the owners will be doing their utmost now to get the best person possible in and ensure the we can keep the squad together,' he said.", + "length": 143 + }, + { + "text": "'He pushed us on from where we were at before he came in; everyone personally improved, results improved as well and things were looking bright for the future.", + "length": 159 + }, + { + "text": "'It's disappointing but at the same time I don't think it will affect the club in terms of progression because there is still a lot of growth left in the team.", + "length": 159 + }, + { + "text": "'Nobody has asked me just yet to get information on Uruguay but I was texting Adam Lallana, just talking about the games coming up and how things were going,\" he said.", + "length": 167 + }, + { + "text": "Southampton midfielder Steven Davis was sad to see Mauricio Pochettino leave for Tottenham, but does not believe it represents the end of the club's rise to prominence.", + "length": 168 + }, + { + "text": "Dream moves for the likes of Shaw and Lallana to Old Trafford and Anfield have been mooted for some time now, but Davis is not treating their departures as a done deals.", + "length": 169 + }, + { + "text": "Saints excelled under Pochettino last season, finishing in eighth place in the Barclays Premier League and drawing admiring glances across the board for their style of play.", + "length": 173 + }, + { + "text": "'If there is anything I can tell him, Ricky Lambert or Luke to help them then of course I will but I have no doubt they will be well briefed when it comes to their game with Uruguay.", + "length": 182 + }, + { + "text": "' Davis will lead his country out against England's World Cup Group D opponents Uruguay on Friday, and has vowed to pass any nuggets of information on to his friends in Roy Hodgson's squad.", + "length": 189 + }, + { + "text": "'You can't stop that and it's always difficult if a top four club comes in to keep a hold of them, but I always felt all the players bought into what the club is trying to achieve and everyone is happy there.", + "length": 208 + }, + { + "text": "Such success was always likely to attract interest from elsewhere and, like their former manager, the likes of Adam Lallana, Luke Shaw and Jay Rodriguez have all been linked with high-profile transfers this summer.", + "length": 214 + }, + { + "text": "Northern Ireland captain Davis, who found out about Pochettino's departure while on international duty in Uruguay, admits it is a tough time but expects the structure that exists at St Mary's to continue yielding benefits.", + "length": 222 + }, + { + "text": "Whether Southampton continue to grow and defy expectations is likely to depend in large part on how successful they are in rebuffing interest in their star assets from the likes of Manchester United, Liverpool and now Pochettino's Tottenham.", + "length": 241 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.3722761869430542 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:23.473925565Z", + "first_section_created": "2025-12-23T09:32:23.474229977Z", + "last_section_published": "2025-12-23T09:32:23.474461686Z", + "all_results_received": "2025-12-23T09:32:23.533123144Z", + "output_generated": "2025-12-23T09:32:23.533307152Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:23.474229977Z", + "publish_time": "2025-12-23T09:32:23.474461686Z", + "first_worker_start": "2025-12-23T09:32:23.475101812Z", + "last_worker_end": "2025-12-23T09:32:23.53222Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:23.47505171Z", + "start_time": "2025-12-23T09:32:23.475134313Z", + "end_time": "2025-12-23T09:32:23.475201816Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:23.47526Z", + "start_time": "2025-12-23T09:32:23.475401Z", + "end_time": "2025-12-23T09:32:23.53222Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:23.47506141Z", + "start_time": "2025-12-23T09:32:23.475137814Z", + "end_time": "2025-12-23T09:32:23.475274919Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:23.475023509Z", + "start_time": "2025-12-23T09:32:23.475101812Z", + "end_time": "2025-12-23T09:32:23.475145314Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3574, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/000c835555db62e319854d9f8912061cdca1893e.json b/data/output/000c835555db62e319854d9f8912061cdca1893e.json new file mode 100644 index 0000000..27fdf2e --- /dev/null +++ b/data/output/000c835555db62e319854d9f8912061cdca1893e.json @@ -0,0 +1,234 @@ +{ + "file_name": "000c835555db62e319854d9f8912061cdca1893e.txt", + "total_words": 416, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "cardinals", + "count": 14 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "from", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "francis", + "count": 10 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "new", + "count": 9 + }, + { + "word": "that", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Allen said.", + "length": 11 + }, + { + "text": "CNN's Daniel Burke and Christabelle Fombu contributed to this report.", + "length": 69 + }, + { + "text": "They are sometimes referred to as the princes of the Catholic Church.", + "length": 69 + }, + { + "text": "\" But for the second time since Francis' election, no Americans made the list.", + "length": 78 + }, + { + "text": "The new cardinals come from countries such as Ethiopia, New Zealand and Myanmar.", + "length": 80 + }, + { + "text": "Last year, Pope Francis appointed 19 new cardinals, including bishops from Haiti and Burkina Faso.", + "length": 98 + }, + { + "text": "That doesn't mean Francis is the first pontiff to appoint cardinals from the developing world, though.", + "length": 102 + }, + { + "text": "John XXIII, whom Francis canonized last year, appointed the first cardinals from Japan, the Philippines and Africa.", + "length": 115 + }, + { + "text": "Beginning in the 1920s, an increasing number of Latin American churchmen were named cardinals, and in the 1960s, St.", + "length": 116 + }, + { + "text": "\"This is a pope who very much wants to reach out to people on the margins, and you clearly see that in this set,\" Allen said.", + "length": 125 + }, + { + "text": "In addition to the 15 new cardinals Francis named on Sunday, five retired archbishops and bishops will also be honored as cardinals.", + "length": 132 + }, + { + "text": "\"On feast of three wise men from far away, the Pope's choices for cardinal say that every local church deserves a place at the big table.", + "length": 137 + }, + { + "text": "\"You're talking about cardinals from typically overlooked places, like Cape Verde, the Pacific island of Tonga, Panama, Thailand, Uruguay.", + "length": 138 + }, + { + "text": "New cardinals are always important because they set the tone in the church and also elect the next pope, CNN Senior Vatican Analyst John L.", + "length": 139 + }, + { + "text": "\"Francis' pattern is very clear: He wants to go to the geographical peripheries rather than places that are already top-heavy with cardinals,\" Allen said.", + "length": 154 + }, + { + "text": "\" In other words, Francis wants a more decentralized church and wants to hear reform ideas from small communities that sit far from Catholicism's power centers, Bellitto said.", + "length": 175 + }, + { + "text": "(CNN)For the second time during his papacy, Pope Francis has announced a new group of bishops and archbishops set to become cardinals -- and they come from all over the world.", + "length": 175 + }, + { + "text": "Christopher Bellitto, a professor of church history at Kean University in New Jersey, noted that Francis announced his new slate of cardinals on the Catholic Feast of the Epiphany, which commemorates the visit of the Magi to Jesus' birthplace in Bethlehem.", + "length": 256 + }, + { + "text": "Pope Francis said Sunday that he would hold a meeting of cardinals on February 14 \"during which I will name 15 new Cardinals who, coming from 13 countries from every continent, manifest the indissoluble links between the Church of Rome and the particular Churches present in the world,\" according to Vatican Radio.", + "length": 314 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5602134466171265 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:23.975272016Z", + "first_section_created": "2025-12-23T09:32:23.975600129Z", + "last_section_published": "2025-12-23T09:32:23.975830038Z", + "all_results_received": "2025-12-23T09:32:24.038239647Z", + "output_generated": "2025-12-23T09:32:24.038693065Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:23.975600129Z", + "publish_time": "2025-12-23T09:32:23.975830038Z", + "first_worker_start": "2025-12-23T09:32:23.976409262Z", + "last_worker_end": "2025-12-23T09:32:24.03721Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:23.976336059Z", + "start_time": "2025-12-23T09:32:23.976411562Z", + "end_time": "2025-12-23T09:32:23.976454363Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:23.976519Z", + "start_time": "2025-12-23T09:32:23.976651Z", + "end_time": "2025-12-23T09:32:24.03721Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:23.97637536Z", + "start_time": "2025-12-23T09:32:23.976441763Z", + "end_time": "2025-12-23T09:32:23.976516766Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:23.976336159Z", + "start_time": "2025-12-23T09:32:23.976409262Z", + "end_time": "2025-12-23T09:32:23.976456063Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2499, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/000ca3fc9d877f8d4bb2ebd1d6858c69be571fd8.json b/data/output/000ca3fc9d877f8d4bb2ebd1d6858c69be571fd8.json new file mode 100644 index 0000000..16c32aa --- /dev/null +++ b/data/output/000ca3fc9d877f8d4bb2ebd1d6858c69be571fd8.json @@ -0,0 +1,412 @@ +{ + "file_name": "000ca3fc9d877f8d4bb2ebd1d6858c69be571fd8.txt", + "total_words": 830, + "top_n_words": [ + { + "word": "the", + "count": 61 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "said", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "detainees", + "count": 11 + }, + { + "word": "for", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "said.", + "length": 5 + }, + { + "text": "A senior U.", + "length": 11 + }, + { + "text": "government \"on the table.", + "length": 25 + }, + { + "text": "They left Afghanistan after U.", + "length": 30 + }, + { + "text": "\"We don't see it as quid pro quo.", + "length": 33 + }, + { + "text": "Attorney General Eric Holder says the U.", + "length": 40 + }, + { + "text": "It has received nearly $900 million in U.", + "length": 41 + }, + { + "text": "East Turkestan is another name for Xinjiang.", + "length": 44 + }, + { + "text": "He said he decided to put an offer to the U.", + "length": 44 + }, + { + "text": "is \"extremely grateful to the government of Bermuda.", + "length": 52 + }, + { + "text": "China has said no returned Uyghurs would be tortured.", + "length": 53 + }, + { + "text": "Watch concerns about resettling the Uyghur detainees » .", + "length": 57 + }, + { + "text": "CNN's Brian Vitagliano and Don Lemon contributed to this report.", + "length": 64 + }, + { + "text": "The official said the average in such cases is $100,000 per person.", + "length": 67 + }, + { + "text": "Uyghurs are a Muslim minority from the Xinjiang province of far-west China.", + "length": 75 + }, + { + "text": "They left China because they did not agree with the government, she told CNN.", + "length": 77 + }, + { + "text": "They were among 17 Uyghur detainees at the facility set up to hold terror suspects.", + "length": 83 + }, + { + "text": "State Department considers a terrorist organization -- that operates in the Xinjiang region.", + "length": 92 + }, + { + "text": "bombings began in the area in October 2001 and were apprehended in Pakistan, the statement said.", + "length": 96 + }, + { + "text": "Manning said the 17 were picked up as a matter of circumstance and never had terrorist training.", + "length": 96 + }, + { + "text": "However, China alleges the men are part of the East Turkestan Islamic Movement -- a group the U.", + "length": 96 + }, + { + "text": "HAMILTON, Bermuda (CNN) -- Four Chinese nationals of Uyghur ethnicity who had been held at the U.", + "length": 97 + }, + { + "text": "The four were flown by private plane Wednesday night from Cuba to Bermuda and were accompanied by U.", + "length": 100 + }, + { + "text": "\" He said Bermuda, a British colony, told London of its intentions, but not until late in the process.", + "length": 102 + }, + { + "text": "The Chinese statement followed an offer by Palau, a Pacific island nation, to accept the Uyghur detainees.", + "length": 106 + }, + { + "text": "The issue is controversial because of China's opposition to the Uyghurs being sent to any country but China.", + "length": 108 + }, + { + "text": "military's Guantanamo Bay, Cuba, detention facility have been resettled in Bermuda, officials said Thursday.", + "length": 108 + }, + { + "text": "China on Thursday urged the United States to hand over all 17 of the Uyghurs instead of sending them elsewhere.", + "length": 111 + }, + { + "text": "aid since independence in 1994, according to congressional auditors, and depends on Washington for its defense.", + "length": 111 + }, + { + "text": "and Bermudan representatives as well as their attorneys, according to Susan Baker Manning, part of the men's legal team.", + "length": 120 + }, + { + "text": "A political backlash against bringing any of the detainees to the United States has increased the focus on sending them to other countries.", + "length": 139 + }, + { + "text": "\" \"Above all, this was a humanitarian act,\" Bermudan Premier Ewart Brown told CNN in an interview at his Cabinet office in Hamilton, Bermuda.", + "length": 141 + }, + { + "text": "President Obama has pledged to close the Guantanamo facility, raising questions of what will happen to the more than 200 remaining detainees.", + "length": 141 + }, + { + "text": "\" The four were twice cleared for release -- once by the Bush administration and again this year, according to a Justice Department statement.", + "length": 142 + }, + { + "text": "The United States will not send Uyghur detainees cleared for release back to China out of concern that they would be tortured by Chinese authorities.", + "length": 149 + }, + { + "text": "Britain must approve the transfer for it to be permanent, Brown said, adding that he believes the issue may raise tension between Bermuda and Britain.", + "length": 150 + }, + { + "text": "administration official told CNN the State Department is working on a final agreement with Palau to settle the matter of the 13 remaining Uyghur detainees.", + "length": 155 + }, + { + "text": "Palau said it will take in the ethnic Uyghur detainees for humanitarian reasons and because of the \"special relationship\" between Palau and the United States.", + "length": 158 + }, + { + "text": "Issues to be worked out include how to transfer the Uyghurs to Palau and how much money the United States would give the men for resettlement, the official said.", + "length": 161 + }, + { + "text": "Brown said he read an article on the issue of the Guantanamo Bay detainees' fates in The Washington Post while he was in Washington for a White House meeting in May.", + "length": 165 + }, + { + "text": "\"According to available information, these individuals did not travel to Afghanistan with the intent to take any hostile action against the United States,\" the statement said.", + "length": 175 + }, + { + "text": "Palau, with a population of about 20,000, is about 1,000 miles (1,600 kilometers) southeast of Manila in the Philippines and about 4,600 miles (7,400 kilometers) west of Hawaii.", + "length": 177 + }, + { + "text": "The Xinjiang region of 20 million people is largely populated by ethnic Uyghurs and other Muslim minorities who have traditionally opposed Beijing's rule and clamored for greater autonomy.", + "length": 188 + }, + { + "text": "The 17 Uyghurs had left China and made their way to Afghanistan, where they settled in a camp with other Uyghurs opposed to the Chinese government, the Justice Department said in its statement.", + "length": 193 + }, + { + "text": "In 2006, five other Uyghur detainees were transferred to Albania, according to the Justice Department, which said it has no reports they took part in any post-resettlement criminal behavior or terrorist activities.", + "length": 214 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6217135488986969 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:24.476581465Z", + "first_section_created": "2025-12-23T09:32:24.476868377Z", + "last_section_published": "2025-12-23T09:32:24.477322495Z", + "all_results_received": "2025-12-23T09:32:24.563238248Z", + "output_generated": "2025-12-23T09:32:24.563445757Z", + "total_processing_time_ms": 86, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 85, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:24.476868377Z", + "publish_time": "2025-12-23T09:32:24.477172789Z", + "first_worker_start": "2025-12-23T09:32:24.477651908Z", + "last_worker_end": "2025-12-23T09:32:24.562387Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:24.477560505Z", + "start_time": "2025-12-23T09:32:24.477651908Z", + "end_time": "2025-12-23T09:32:24.477770713Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:24.477766Z", + "start_time": "2025-12-23T09:32:24.477905Z", + "end_time": "2025-12-23T09:32:24.562387Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:24.477649808Z", + "start_time": "2025-12-23T09:32:24.477717611Z", + "end_time": "2025-12-23T09:32:24.478058225Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:24.477576305Z", + "start_time": "2025-12-23T09:32:24.477674809Z", + "end_time": "2025-12-23T09:32:24.477721511Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:24.477231691Z", + "publish_time": "2025-12-23T09:32:24.477322495Z", + "first_worker_start": "2025-12-23T09:32:24.477718811Z", + "last_worker_end": "2025-12-23T09:32:24.506105Z", + "total_journey_time_ms": 28, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:24.477746412Z", + "start_time": "2025-12-23T09:32:24.477769813Z", + "end_time": "2025-12-23T09:32:24.477774013Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:24.477989Z", + "start_time": "2025-12-23T09:32:24.478127Z", + "end_time": "2025-12-23T09:32:24.506105Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 27 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:24.47769121Z", + "start_time": "2025-12-23T09:32:24.477718811Z", + "end_time": "2025-12-23T09:32:24.477725211Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:24.477704911Z", + "start_time": "2025-12-23T09:32:24.477731112Z", + "end_time": "2025-12-23T09:32:24.477732412Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 111, + "min_processing_ms": 27, + "max_processing_ms": 84, + "avg_processing_ms": 55, + "median_processing_ms": 84, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2533, + "slowest_section_id": 0, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/000cd1ee0098c4d510a03ddc97d11764448ebac2.json b/data/output/000cd1ee0098c4d510a03ddc97d11764448ebac2.json new file mode 100644 index 0000000..c7b6df2 --- /dev/null +++ b/data/output/000cd1ee0098c4d510a03ddc97d11764448ebac2.json @@ -0,0 +1,266 @@ +{ + "file_name": "000cd1ee0098c4d510a03ddc97d11764448ebac2.txt", + "total_words": 542, + "top_n_words": [ + { + "word": "the", + "count": 47 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "leopard", + "count": 10 + }, + { + "word": "are", + "count": 9 + }, + { + "word": "said", + "count": 9 + }, + { + "word": "district", + "count": 7 + }, + { + "word": "has", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Most leopards live on wild prey.", + "length": 32 + }, + { + "text": "\"It could be the same leopard,\" he said.", + "length": 40 + }, + { + "text": "Normally, it is illegal to kill wild animals.", + "length": 45 + }, + { + "text": "\"No adult male has been killed,\" Kharel said.", + "length": 45 + }, + { + "text": "The district administration has announced a Rs.", + "length": 47 + }, + { + "text": "\"There is no alternative but to kill the leopard.", + "length": 49 + }, + { + "text": "25,000 (about $300) reward to anyone who captures or kills the leopard.", + "length": 71 + }, + { + "text": "If not, there are at most two of the man-eating creatures around, he believes.", + "length": 78 + }, + { + "text": "Of the 15 victims in Nepal so far, two-thirds are children below the age of 10.", + "length": 79 + }, + { + "text": "After killing its victim, the leopard takes the body away into the forest to eat.", + "length": 81 + }, + { + "text": "All the victims are from villages bordering the dense forests in the district, he said.", + "length": 87 + }, + { + "text": "The police chief suspects that a single man-eating leopard is responsible for the deaths.", + "length": 89 + }, + { + "text": "Controlling this particular leopard has been a challenge for the wildlife officials in Kathmandu.", + "length": 97 + }, + { + "text": "\" The chief district administrator has granted permission for this particular leopard to be killed.", + "length": 99 + }, + { + "text": "Leopards are common in the low mountain areas, as compared to the high Himalayas, across the country.", + "length": 101 + }, + { + "text": "More human victims could also be expected if there were more than one or two man-eaters around, he said.", + "length": 104 + }, + { + "text": "\"We are sending a veterinary doctor to the district to understand the situation,\" Dhakal, the ecologist, said.", + "length": 110 + }, + { + "text": "\"Since human blood has more salt than animal blood, once wild animals get the taste of salty blood they do not like other animals like deer,\" Dhakal said.", + "length": 154 + }, + { + "text": "The others are older children and a 29-year-old woman who had gone to collect fodder for domestic animals in the nearby forest, a common practice in Nepal.", + "length": 155 + }, + { + "text": "The grisly discovery, which came after teams of people searched for the child, marks the 15th victim in the past 15 months in that remote district in western Nepal.", + "length": 164 + }, + { + "text": "While cases of leopards killing domestic animals are common, and there are sometimes instances of leopards killing people in Nepal, this case is \"extreme,\" Dhakal said.", + "length": 168 + }, + { + "text": "\"In the case of the children it just leaves behind the head, eating everything, but some parts of the adult body are left behind because it cannot finish it,\" Kharel added.", + "length": 172 + }, + { + "text": "Kathmandu, Nepal (CNN) -- A ferocious leopard may have killed 15 people in Nepal in a 15-month span, its latest victim a 4-year-old boy that the creature dragged away into the jungle to eat.", + "length": 190 + }, + { + "text": "Maheshwor Dhakal, an ecologist at the Department of National Parks and Wildlife Conservation in Kathmandu, agreed that it is unusual to find more than one or two man-eating animals in one area.", + "length": 193 + }, + { + "text": "The head of boy was found in the forest a kilometer from his home Saturday morning, said Kamal Prasad Kharel, the police chief of the Baitadi district, an area about 600 kilometers (373 miles) west of Kathmandu.", + "length": 211 + }, + { + "text": "Kharel said he feared the actual number of people killed by the leopard could be higher than 15, because others have lost their life to leopard attacks in Uttarkhand state in northern India, which borders Baitadi district.", + "length": 222 + }, + { + "text": "The local administration has sought to raise public awareness of the dangers of going alone into nearby forests and has mobilized the police, armed police force and local people who have licensed guns to hunt for the animal.", + "length": 224 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6810291409492493 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:24.978087923Z", + "first_section_created": "2025-12-23T09:32:24.978454838Z", + "last_section_published": "2025-12-23T09:32:24.978642045Z", + "all_results_received": "2025-12-23T09:32:25.042742722Z", + "output_generated": "2025-12-23T09:32:25.042886027Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:24.978454838Z", + "publish_time": "2025-12-23T09:32:24.978642045Z", + "first_worker_start": "2025-12-23T09:32:24.979157966Z", + "last_worker_end": "2025-12-23T09:32:25.041841Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:24.979155266Z", + "start_time": "2025-12-23T09:32:24.979203668Z", + "end_time": "2025-12-23T09:32:24.97926857Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:24.979327Z", + "start_time": "2025-12-23T09:32:24.979457Z", + "end_time": "2025-12-23T09:32:25.041841Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:24.979122564Z", + "start_time": "2025-12-23T09:32:24.979191767Z", + "end_time": "2025-12-23T09:32:24.97926167Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:24.979088563Z", + "start_time": "2025-12-23T09:32:24.979157966Z", + "end_time": "2025-12-23T09:32:24.979203468Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3132, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/000ce9139d5bf974c2a621226b6ed77900bfa498.json b/data/output/000ce9139d5bf974c2a621226b6ed77900bfa498.json new file mode 100644 index 0000000..9148cf0 --- /dev/null +++ b/data/output/000ce9139d5bf974c2a621226b6ed77900bfa498.json @@ -0,0 +1,338 @@ +{ + "file_name": "000ce9139d5bf974c2a621226b6ed77900bfa498.txt", + "total_words": 682, + "top_n_words": [ + { + "word": "the", + "count": 37 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "bitcoin", + "count": 14 + }, + { + "word": "as", + "count": 10 + }, + { + "word": "currency", + "count": 10 + }, + { + "word": "for", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "org and Huobi.", + "length": 14 + }, + { + "text": "Exchange BTCTrade.", + "length": 18 + }, + { + "text": "Victoria Woollaston .", + "length": 21 + }, + { + "text": "and Canadian customers.", + "length": 23 + }, + { + "text": "Gox exchange went offline.", + "length": 26 + }, + { + "text": "Then in February 2014, Mt.", + "length": 26 + }, + { + "text": "It fell from $550 down to $418.", + "length": 31 + }, + { + "text": "In April last year, CoinLab sued Mt.", + "length": 36 + }, + { + "text": "As this graph from Coinbase demonstrates.", + "length": 41 + }, + { + "text": "Gox of not transferring them the existing U.", + "length": 44 + }, + { + "text": "Gox services went south after CoinLab accused Mt.", + "length": 49 + }, + { + "text": "Gox for $75 million (£44 million) for breach of contract.", + "length": 58 + }, + { + "text": "Many websites are now taking Bitcoins as a form of currency.", + "length": 60 + }, + { + "text": "This equates to a drop of almost 20 per cent in over 24 hours.", + "length": 62 + }, + { + "text": "This equates to a drop of almost 20 per cent in over 24 hours.", + "length": 62 + }, + { + "text": "The coins first emerged in 2008 and launched as a network in 2009.", + "length": 66 + }, + { + "text": "com made its announcement on Thursday, followed shortly by BTC100.", + "length": 66 + }, + { + "text": "Gox exchange went offline after rumours $375 million-worth had been stolen.", + "length": 75 + }, + { + "text": "Bitcoin is a piece of data confined to an internet network by an algorithm.", + "length": 75 + }, + { + "text": "The drop follows a similar decline in Bitcoin value in February when the Mt.", + "length": 76 + }, + { + "text": "Chinese banks are clamping down on the currency after speculation about hacks and thefts .", + "length": 90 + }, + { + "text": "It has been dismissed by some as a Ponzi Scheme and touted by others as the future of money.", + "length": 92 + }, + { + "text": "Once released it can be traded and used like money online and can be purchased with real cash.", + "length": 94 + }, + { + "text": "As well as digital currency, Bitcoin miners enjoy the competitive nature of unlocking the coins.", + "length": 96 + }, + { + "text": "The partnership between the companies, agreeing that CoinLab would managed the North American Mt.", + "length": 97 + }, + { + "text": "Chinese banks are clamping down on the currency, however, after speculation over hacks and thefts.", + "length": 98 + }, + { + "text": "The drop follows a similar decline in Bitcoin value, stock image pictured, in February when the Mt.", + "length": 99 + }, + { + "text": "Rumours about hacks and theft caused problems for the online currency's public image, and prices plummeted by 22% .", + "length": 115 + }, + { + "text": "In its heyday, Bitcoin’s value soared passed $1,100 but just five months later, the currency’s price has plummeted.", + "length": 119 + }, + { + "text": "The price for a single Bitcoin almost quadrupled from its previous record high of $267 (£165) less than three weeks before.", + "length": 124 + }, + { + "text": "This is in sharp contrast to November, when Bitcoin passed the $1,000 (£613) mark for the first time since it was introduced in 2009.", + "length": 134 + }, + { + "text": "There are 21 million coins predicted to last until 2140 and their finite nature means they perform more like a commodity, such as gold.", + "length": 135 + }, + { + "text": "It is not centrally controlled and its unique and complex set-up means the market cannot be altered or hacked, according to the developers.", + "length": 139 + }, + { + "text": "Chinese trading accounts for a large volume of Bitcoin transactions, which fell to 31,849 this morning, down from 121,824 at the start of March.", + "length": 144 + }, + { + "text": "Chinese trading makes up a large volume of Bitcoin transactions, and the news saw its value drop to $350 - a loss of around $100 in a single day.", + "length": 145 + }, + { + "text": "The news saw the value of the currency drop to $350 (£209) - a loss of around $100 (£60), according to Bitcoin Average's price index, pictured .", + "length": 146 + }, + { + "text": "The rumoured theft and subsequent shutdown caused problems for the online currency's public and financial image, and prices plummeted by 22 per cent.", + "length": 149 + }, + { + "text": "Analysts at the time claimed the demand for Bitcoin followed increased awareness about the benefits of the currency, and a drive to move it into the mainstream.", + "length": 160 + }, + { + "text": "Earlier this week, Chinese banks sent notices to local Bitcoin exchanges stating their accounts would be frozen om April 15 as part of a wider national crackdown.", + "length": 162 + }, + { + "text": "Earlier this week, Chinese banks sent notices to local Bitcoin exchanges stating their accounts would be frozen on April 15 as part of a wider national crackdown.", + "length": 162 + }, + { + "text": "‘Interestingly, it appears Chinese banks started with smaller exchanges before working up to those with larger trading volumes,’ said Jon Southurst at CoinDesk.", + "length": 164 + }, + { + "text": "Gox suspended all withdrawals indefinitely, even after some traders had been waiting months for their withdrawals, causing investors to worry about the state of the currency.", + "length": 174 + }, + { + "text": "For example, in June 2011, hackers fraudulently dropped the worth of Bitcoins by 1 per cent after using a Bitcoin traders account to transfer Bitcoins and then sell them on, fluctuating the price.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4596993625164032 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:25.479445374Z", + "first_section_created": "2025-12-23T09:32:25.479859791Z", + "last_section_published": "2025-12-23T09:32:25.480163203Z", + "all_results_received": "2025-12-23T09:32:25.543441846Z", + "output_generated": "2025-12-23T09:32:25.543624254Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:25.479859791Z", + "publish_time": "2025-12-23T09:32:25.480163203Z", + "first_worker_start": "2025-12-23T09:32:25.480622822Z", + "last_worker_end": "2025-12-23T09:32:25.542538Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:25.480663323Z", + "start_time": "2025-12-23T09:32:25.480741526Z", + "end_time": "2025-12-23T09:32:25.48083743Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:25.480911Z", + "start_time": "2025-12-23T09:32:25.481036Z", + "end_time": "2025-12-23T09:32:25.542538Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:25.480640222Z", + "start_time": "2025-12-23T09:32:25.480712525Z", + "end_time": "2025-12-23T09:32:25.480816529Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:25.480538718Z", + "start_time": "2025-12-23T09:32:25.480622822Z", + "end_time": "2025-12-23T09:32:25.480676324Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4010, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/000cefd88b6f79d28c1fb220d5a3eed19514a462.json b/data/output/000cefd88b6f79d28c1fb220d5a3eed19514a462.json new file mode 100644 index 0000000..fa9ffc5 --- /dev/null +++ b/data/output/000cefd88b6f79d28c1fb220d5a3eed19514a462.json @@ -0,0 +1,242 @@ +{ + "file_name": "000cefd88b6f79d28c1fb220d5a3eed19514a462.txt", + "total_words": 278, + "top_n_words": [ + { + "word": "the", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "at", + "count": 6 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "an", + "count": 4 + }, + { + "word": "are", + "count": 4 + }, + { + "word": "children", + "count": 4 + }, + { + "word": "deaths", + "count": 4 + }, + { + "word": "it", + "count": 4 + }, + { + "word": "last", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "19:55 EST, 21 June 2013 .", + "length": 25 + }, + { + "text": "19:55 EST, 21 June 2013 .", + "length": 25 + }, + { + "text": "5 deaths – almost  double other major child cardiac units.", + "length": 61 + }, + { + "text": "According to its research, it equates to an annual rate of 7.", + "length": 61 + }, + { + "text": "According to its research, it equates to an annual rate of 7.", + "length": 61 + }, + { + "text": "Michelle Elliott, of Fragile Hearts, said: ‘How many more are going to die before something is done?", + "length": 102 + }, + { + "text": "’ Concerns about the unit led to operations being stopped in April while an urgent review was carried out.", + "length": 108 + }, + { + "text": "The deaths of 30 children at a controversial heart surgery unit are being investigated, it emerged last night.", + "length": 110 + }, + { + "text": "The group acted after discovering two more children died at Leeds last week, boys aged nine years and 18 months.", + "length": 112 + }, + { + "text": "A spokesman said 1,500 operations  were carried out in this period, meaning  a ‘normal’ death rate of 2 per cent.", + "length": 119 + }, + { + "text": "The latest deaths were not included in the figure of 30 for April 2009 to April 2013 confirmed by NHS England yesterday.", + "length": 120 + }, + { + "text": "Previously, Fragile Hearts had agreed to an NHS request not to publicly reveal the total while the investigation continued.", + "length": 123 + }, + { + "text": "Family support group Fragile Hearts went public with the fatality levels at Leeds General Infirmary over the past four years.", + "length": 125 + }, + { + "text": "If they are investigating, we cannot understand why they are being allowed to continue surgery when two children died last week.", + "length": 128 + }, + { + "text": "5 deaths ¿ almost double other major child cardiac units (stock image) Surgery at the unit was briefly suspended earlier this year.", + "length": 132 + }, + { + "text": "Surgery at the unit was briefly suspended earlier this year after it was discovered that two more children died at Leeds last week, boys aged nine years and 18 months .", + "length": 168 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5282045602798462 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:25.981123739Z", + "first_section_created": "2025-12-23T09:32:25.981466252Z", + "last_section_published": "2025-12-23T09:32:25.981628659Z", + "all_results_received": "2025-12-23T09:32:26.050380922Z", + "output_generated": "2025-12-23T09:32:26.050493827Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:25.981466252Z", + "publish_time": "2025-12-23T09:32:25.981628659Z", + "first_worker_start": "2025-12-23T09:32:25.982103178Z", + "last_worker_end": "2025-12-23T09:32:26.047466Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:25.982134879Z", + "start_time": "2025-12-23T09:32:25.982196082Z", + "end_time": "2025-12-23T09:32:25.982243384Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:25.982326Z", + "start_time": "2025-12-23T09:32:25.982521Z", + "end_time": "2025-12-23T09:32:26.047466Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:25.982086677Z", + "start_time": "2025-12-23T09:32:25.98215608Z", + "end_time": "2025-12-23T09:32:25.982206582Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:25.982045476Z", + "start_time": "2025-12-23T09:32:25.982103178Z", + "end_time": "2025-12-23T09:32:25.98214298Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1647, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/000e009f6b1d954d827c9a550f3f24a5474ee82b.json b/data/output/000e009f6b1d954d827c9a550f3f24a5474ee82b.json new file mode 100644 index 0000000..e748baf --- /dev/null +++ b/data/output/000e009f6b1d954d827c9a550f3f24a5474ee82b.json @@ -0,0 +1,548 @@ +{ + "file_name": "000e009f6b1d954d827c9a550f3f24a5474ee82b.txt", + "total_words": 1385, + "top_n_words": [ + { + "word": "the", + "count": 73 + }, + { + "word": "to", + "count": 52 + }, + { + "word": "he", + "count": 38 + }, + { + "word": "and", + "count": 34 + }, + { + "word": "of", + "count": 34 + }, + { + "word": "white", + "count": 32 + }, + { + "word": "a", + "count": 31 + }, + { + "word": "his", + "count": 28 + }, + { + "word": "that", + "count": 24 + }, + { + "word": "in", + "count": 23 + } + ], + "sorted_sentences": [ + { + "text": "...", + "length": 3 + }, + { + "text": "He was dead.", + "length": 12 + }, + { + "text": "\" he told NPR.", + "length": 14 + }, + { + "text": "It was too late.", + "length": 16 + }, + { + "text": "A soldier, changed .", + "length": 20 + }, + { + "text": "\" But he kept focused.", + "length": 22 + }, + { + "text": "White had to see, he said.", + "length": 26 + }, + { + "text": "just the amount of fire ...", + "length": 27 + }, + { + "text": "That soldier would survive.", + "length": 27 + }, + { + "text": "He would carry out his duty.", + "length": 28 + }, + { + "text": "White crawled toward the man.", + "length": 29 + }, + { + "text": "See Kyle White's Army profile .", + "length": 31 + }, + { + "text": "Risking death, again and again .", + "length": 32 + }, + { + "text": "White figured he would be killed.", + "length": 33 + }, + { + "text": "I'm not gonna make it through this.", + "length": 35 + }, + { + "text": "Finally, maybe, there could be hope.", + "length": 36 + }, + { + "text": "White ran toward him, braving enemy fire.", + "length": 41 + }, + { + "text": "That is why I wear this medal for my team.", + "length": 42 + }, + { + "text": "\"And now it's time for America to do ours.", + "length": 42 + }, + { + "text": "But he would do what he was trained to do.", + "length": 42 + }, + { + "text": "\" But something inside him changed, he said.", + "length": 44 + }, + { + "text": "\"It was never a choice,\" he explained to CNN.", + "length": 45 + }, + { + "text": "The shrapnel and rock fragments cut his face.", + "length": 45 + }, + { + "text": "CNN's Barbara Starr contributed to this report.", + "length": 47 + }, + { + "text": "Dazed, he struggled to take in what was happening.", + "length": 50 + }, + { + "text": "\" Read the transcript of the White House ceremony .", + "length": 51 + }, + { + "text": "But then a friendly mortar round landed near White.", + "length": 51 + }, + { + "text": "But it's something, as time goes on, it gets easier.", + "length": 52 + }, + { + "text": "But the man's injuries were too severe, and he died.", + "length": 52 + }, + { + "text": "\"They were heading into an area known as ambush alley.", + "length": 54 + }, + { + "text": "White was able to drag the wounded man back to the tree.", + "length": 56 + }, + { + "text": "24 minority veterans receive long overdue Medal of Honor .", + "length": 58 + }, + { + "text": "\" And it hasn't entirely gone away, all these years later.", + "length": 58 + }, + { + "text": "\" White first returned home and trained other paratrooopers.", + "length": 60 + }, + { + "text": "Then two shots, then the echo, then fully automatic gunfire.", + "length": 60 + }, + { + "text": "\"It's something you still think about every day,\" White said.", + "length": 61 + }, + { + "text": "Service members deserve a leader who is all in, he explained.", + "length": 61 + }, + { + "text": "\"I still have these images from that day burned into my head.", + "length": 61 + }, + { + "text": "They had done everything their country could ask for and more.", + "length": 62 + }, + { + "text": "\" Still, the President said that he deserved to be singled out.", + "length": 63 + }, + { + "text": "\"Without the team,\" he said, \"there could be no Medal of Honor.", + "length": 63 + }, + { + "text": "\"Kyle, members of Chosen Company, you did your duty,\" Obama said.", + "length": 65 + }, + { + "text": "And I vow to live up to the responsibility of doing so,\" White said.", + "length": 68 + }, + { + "text": "\" As such, White couldn't help but think about his brothers in arms.", + "length": 68 + }, + { + "text": "\"And that was pretty much the reason why I decided to leave the Army.", + "length": 69 + }, + { + "text": "\" Attacked in 'ambush alley' On Tuesday, White dressed in full uniform.", + "length": 71 + }, + { + "text": "\"Even to this day, you know, I can't say if it was something good or bad.", + "length": 73 + }, + { + "text": "Not long before, Obama recalled White's bravery and that of his colleagues.", + "length": 75 + }, + { + "text": "\"I told myself from the beginning that I was going to be killed, you know...", + "length": 76 + }, + { + "text": "Obama called him on February 10 to tell him he'd be given the Medal of Honor.", + "length": 77 + }, + { + "text": "When help arrived, he told his rescuers to put the other wounded aboard first.", + "length": 78 + }, + { + "text": "Moments after he came to, an enemy round hit a rock just inches from his head.", + "length": 78 + }, + { + "text": "It was at that point in the attack that White realized his radio wasn't working.", + "length": 80 + }, + { + "text": "\" Taking so much fire, members of his patrol were separated as they tried to take cover.", + "length": 88 + }, + { + "text": "He and four others had been separated from the other soldiers, who'd jumped from a cliff.", + "length": 89 + }, + { + "text": "When it came time for White to re-enlist, he thought hard about whether doing so felt right.", + "length": 92 + }, + { + "text": "The President paid tribute to those who died that fall day in Afghanistan and those who survived.", + "length": 97 + }, + { + "text": "White administered first aid to one wounded soldier using the only cover available: a single tree.", + "length": 98 + }, + { + "text": "It was unacceptable to him to continue in the service and then, perhaps, be deployed to Afghanistan.", + "length": 100 + }, + { + "text": "again, this time in the knee, so the White wrapped his belt around the man's leg, creating a tourniquet.", + "length": 104 + }, + { + "text": "He's the 10th recipient of that award for his actions in Afghanistan, and the seventh surviving recipient.", + "length": 106 + }, + { + "text": "He decided against it because he doubted that he could devote his complete heart and mind to it, he told NPR.", + "length": 109 + }, + { + "text": "Then White found a working radio on a deceased comrade and called for artillery and helicopter gunships to help.", + "length": 112 + }, + { + "text": "He looked out and saw a member of his patrol about 30 feet away whose wounds were so bad that he could not move.", + "length": 112 + }, + { + "text": "Suffering a concussion, White managed to hang on, waiting for helicopters to evacuate him and others with him that day.", + "length": 119 + }, + { + "text": "\"I remember just red hot chunks of metal like the size of my palm just flinging by your head,\" he told Stars and Stripes.", + "length": 121 + }, + { + "text": "The Washington state native joined up after high school, following the lead of his father, a former Army Special Forces member.", + "length": 127 + }, + { + "text": "He was there on November 9, 2007, walking back from a meeting with elders with his unit of 14 and a squad of Afghan army soldiers.", + "length": 130 + }, + { + "text": "Speaking with National Public Radio this week, White said the experience -- from the violence to the wait -- seemed like \"forever.", + "length": 130 + }, + { + "text": "They both represent his family on that day six years ago -- the seven others who, like him, survived as well as those who did not.", + "length": 130 + }, + { + "text": "White continued to risk himself to help his fellow warriors, again running from cover into enemy fire to reach the platoon leader.", + "length": 130 + }, + { + "text": "His service had, like many other members of the military, earned him a ticket to Afghanistan as his platoon's radio telephone operator.", + "length": 135 + }, + { + "text": "Speaking minutes after President Barack Obama gave him the highest military honor, White insisted the two emblems are equally significant.", + "length": 138 + }, + { + "text": "White was finishing off his first magazine and beginning to load another one when an rocket-propelled grenade exploded, knocking him unconscious.", + "length": 145 + }, + { + "text": "He aced it, and in doing so represented the best of what Obama called the \"9/11 generation (which) has proven itself to be one of America's greatest.", + "length": 149 + }, + { + "text": "Four service members received the Medal of Honor -- all posthumously -- for actions in the war in Iraq, according to the Congressional Medal of Honor Society.", + "length": 158 + }, + { + "text": "White told the military publication Stars and Stripes that he could see the leader's helmet and assault pack, but he couldn't tell whether the leader was alive.", + "length": 160 + }, + { + "text": "\" White himself insisted that the Medal of Honor cannot really be an individual award, calling it \"a testament to the trust we have in each other and our leaders.", + "length": 162 + }, + { + "text": "\"They knew not to stop, they had to keep moving,\" Obama recalled of the group walking single-file with a cliff to their right and a steep, rocky slope to their left.", + "length": 165 + }, + { + "text": "The former Army sergeant said Tuesday he owes it to these men, whom he calls \"my heroes,\" to live his life well, even now that he's left the military, and with honor.", + "length": 166 + }, + { + "text": "\" In an interview prior to the award ceremony, White told CNN how the group walked \"down this little incline and looking into the valley, (when) I hear this single shot.", + "length": 169 + }, + { + "text": "\" \"Today,\" the President said to a crowd that included White, his parents and many of his former comrades, \"we pay tribute to a soldier who embodies the courage of his generation.", + "length": 179 + }, + { + "text": "A high school freshman when the Twin Towers fell on September 11, 2001, White joined the Army and was just 20 years old and 21 months into his military service when he faced the ultimate test.", + "length": 192 + }, + { + "text": "But on most other weekdays, he now wears a suit to his job as an investment analyst at a bank in Charlotte, North Carolina -- a job that he's admitted to Obama, with a laugh, is less exciting than his previous job in the Army.", + "length": 226 + }, + { + "text": "\"Though I am still uncomfortable with hearing my name and the word 'hero' in the same sentence, I am now ready for the challenge of proudly wearing this piece of blue fabric and carved metal with the same reverence that I wear the bracelet.", + "length": 240 + }, + { + "text": "(CNN) -- Kyle White now has two pieces of metal to wear -- one, a bracelet inscribed with the names of his six comrades killed in an ambush in Afghanistan, the other, a Medal of Honor given to him for his valor that ensured that death toll wasn't higher.", + "length": 254 + }, + { + "text": "In a brief statement to reporters after Tuesday's ceremony, White called the Medal of Honor \"a symbol of the responsibility all soldiers knowingly face when they depart for distant lands in defense of the nation, a responsibility that locks us all in the bonds of brotherhood.", + "length": 276 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4305161386728287 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:26.482406487Z", + "first_section_created": "2025-12-23T09:32:26.484415368Z", + "last_section_published": "2025-12-23T09:32:26.484804483Z", + "all_results_received": "2025-12-23T09:32:26.583097234Z", + "output_generated": "2025-12-23T09:32:26.583341444Z", + "total_processing_time_ms": 100, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 98, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:26.484415368Z", + "publish_time": "2025-12-23T09:32:26.484666878Z", + "first_worker_start": "2025-12-23T09:32:26.485257102Z", + "last_worker_end": "2025-12-23T09:32:26.582148Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:26.4852193Z", + "start_time": "2025-12-23T09:32:26.485302603Z", + "end_time": "2025-12-23T09:32:26.48545371Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:26.485466Z", + "start_time": "2025-12-23T09:32:26.48561Z", + "end_time": "2025-12-23T09:32:26.582148Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 96 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:26.485265102Z", + "start_time": "2025-12-23T09:32:26.485328704Z", + "end_time": "2025-12-23T09:32:26.485484311Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:26.485176998Z", + "start_time": "2025-12-23T09:32:26.485257102Z", + "end_time": "2025-12-23T09:32:26.485414708Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:26.48471938Z", + "publish_time": "2025-12-23T09:32:26.484804483Z", + "first_worker_start": "2025-12-23T09:32:26.485167498Z", + "last_worker_end": "2025-12-23T09:32:26.562933Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:26.485312604Z", + "start_time": "2025-12-23T09:32:26.485357706Z", + "end_time": "2025-12-23T09:32:26.485424908Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:26.485405Z", + "start_time": "2025-12-23T09:32:26.485535Z", + "end_time": "2025-12-23T09:32:26.562933Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:26.485260502Z", + "start_time": "2025-12-23T09:32:26.485320404Z", + "end_time": "2025-12-23T09:32:26.485449709Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:26.485102495Z", + "start_time": "2025-12-23T09:32:26.485167498Z", + "end_time": "2025-12-23T09:32:26.4852076Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 173, + "min_processing_ms": 77, + "max_processing_ms": 96, + "avg_processing_ms": 86, + "median_processing_ms": 96, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3814, + "slowest_section_id": 0, + "slowest_section_time_ms": 97 + } +} diff --git a/data/output/000e04bb1b0e8c07dcb89c5ee3ab88d815a7088e.json b/data/output/000e04bb1b0e8c07dcb89c5ee3ab88d815a7088e.json new file mode 100644 index 0000000..ece8303 --- /dev/null +++ b/data/output/000e04bb1b0e8c07dcb89c5ee3ab88d815a7088e.json @@ -0,0 +1,358 @@ +{ + "file_name": "000e04bb1b0e8c07dcb89c5ee3ab88d815a7088e.txt", + "total_words": 542, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "train", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "paris", + "count": 12 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "workers", + "count": 8 + }, + { + "word": "bill", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Tuesday.", + "length": 8 + }, + { + "text": "concerns.", + "length": 9 + }, + { + "text": "competition.", + "length": 12 + }, + { + "text": "Travelmail Reporter .", + "length": 21 + }, + { + "text": "and on commuter lines.", + "length": 22 + }, + { + "text": "state-run railway system.", + "length": 25 + }, + { + "text": "protesters to the ground.", + "length": 25 + }, + { + "text": "demonstration continued peacefully.", + "length": 35 + }, + { + "text": "The protesters retreated and their .", + "length": 36 + }, + { + "text": "demanding that the bill be delayed or changed.", + "length": 46 + }, + { + "text": "And the chaos shows no signs of slowing down as .", + "length": 49 + }, + { + "text": "About a third of trains were canceled nationwide .", + "length": 50 + }, + { + "text": "' Lawmakers begin debating the bill Tuesday afternoon.", + "length": 54 + }, + { + "text": "up for full-scale railway liberalization in coming years.", + "length": 57 + }, + { + "text": "Several hundred workers staged a protest Tuesday near the .", + "length": 59 + }, + { + "text": "Workers fear the reform will mean job losses and security .", + "length": 59 + }, + { + "text": "Prime Minister Manuel Valls broke with the tradition that .", + "length": 59 + }, + { + "text": "network, which would pave the way to opening up railways to .", + "length": 61 + }, + { + "text": "Fed up: French commuters face traffic chaos amid the strike .", + "length": 61 + }, + { + "text": "The strike has not affected international lines such as the .", + "length": 61 + }, + { + "text": "The protesters blocked cars and tried to push past police to .", + "length": 62 + }, + { + "text": "The government says the reform is needed to create a stronger .", + "length": 63 + }, + { + "text": "Later Tuesday, French television showed footage of protesters .", + "length": 63 + }, + { + "text": "The strike began last Wednesday, and while only a minority of .", + "length": 63 + }, + { + "text": "links travelers with cities across western and southwest France.", + "length": 64 + }, + { + "text": "Officers  responded with tear gas and batons and wrestled a few .", + "length": 66 + }, + { + "text": "The bill would unite the SNCF train operator with the RFF railway .", + "length": 67 + }, + { + "text": "National Assembly on Paris' Left Bank, waving red union flags and .", + "length": 67 + }, + { + "text": "approach the parliament building, firing flares and throwing bottles.", + "length": 69 + }, + { + "text": "Eurostar train from Paris to London, but it has caused problems for .", + "length": 69 + }, + { + "text": "Deserted: A passenger waits at Gare de L'Est train station in Paris .", + "length": 69 + }, + { + "text": "marching onto train tracks at Paris' busy Montparnasse station, which .", + "length": 71 + }, + { + "text": "the reform bill goes to the lower house of Parliament for debate today.", + "length": 71 + }, + { + "text": "structure for the railways, as France and other European countries gear .", + "length": 73 + }, + { + "text": "workers are taking part, it has disrupted travel on trains across France .", + "length": 74 + }, + { + "text": "Angry train staff hurled bottles and blocked traffic over a bill to reform the .", + "length": 80 + }, + { + "text": "Stranded: Paris commuters pile out of one of the few trains available today in Paris .", + "length": 86 + }, + { + "text": "international travelers using the commuter rail to and from Paris' Charles de Gaulle Airport.", + "length": 93 + }, + { + "text": "Controversy: The proposed bill would unite the SNCF train operator with the RFF railway network .", + "length": 97 + }, + { + "text": "Clash: Riot police forces hold down a striking worker of the French state-run rail operator SNCF .", + "length": 98 + }, + { + "text": "Delays: Travellers being helped by SNCF assistance at the Paris-Austerlitz train station in Paris .", + "length": 99 + }, + { + "text": "Shocking scenes: A man takes photos while walking in the smoke of a flare during the demonstrations .", + "length": 101 + }, + { + "text": "Hellish: Commuters arrive at the Gare de l'Est train station by one of a few trains available in Paris .", + "length": 104 + }, + { + "text": "governments don't meddle in strikes, saying Monday that the movement was 'not useful and not responsible.", + "length": 105 + }, + { + "text": "United: Striking train workers, some wearing bibs of the Workers' Force union, shout anti government slogans .", + "length": 110 + }, + { + "text": "Clash: French riot police fire tear gas at train workers as they protest a bill to reform the Paris rail system .", + "length": 113 + }, + { + "text": "Long wait: Passengers try to get comfortable at the Gare de Lyon train station in Paris as they face long delays .", + "length": 114 + }, + { + "text": "Violent: Train workers and armed officers clash on the streets over the proposed change to laws relating to the state-run railway network .", + "length": 139 + }, + { + "text": "French riot police clashed with train workers during a week long strike, causing some of the worst disruption to the country's rail network in years.", + "length": 149 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.9127382040023804 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:26.985581007Z", + "first_section_created": "2025-12-23T09:32:26.985918721Z", + "last_section_published": "2025-12-23T09:32:26.986181332Z", + "all_results_received": "2025-12-23T09:32:27.050632422Z", + "output_generated": "2025-12-23T09:32:27.050798928Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:26.985918721Z", + "publish_time": "2025-12-23T09:32:26.986181332Z", + "first_worker_start": "2025-12-23T09:32:26.986793256Z", + "last_worker_end": "2025-12-23T09:32:27.049693Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:26.986722253Z", + "start_time": "2025-12-23T09:32:26.986793256Z", + "end_time": "2025-12-23T09:32:26.986856959Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:26.987147Z", + "start_time": "2025-12-23T09:32:26.987281Z", + "end_time": "2025-12-23T09:32:27.049693Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:26.986768555Z", + "start_time": "2025-12-23T09:32:26.986836658Z", + "end_time": "2025-12-23T09:32:26.986935262Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:26.986766055Z", + "start_time": "2025-12-23T09:32:26.986842958Z", + "end_time": "2025-12-23T09:32:26.986961163Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3360, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/000e0f3e7d5f803ac90c923d215462b5330b24c8.json b/data/output/000e0f3e7d5f803ac90c923d215462b5330b24c8.json new file mode 100644 index 0000000..5a28dd9 --- /dev/null +++ b/data/output/000e0f3e7d5f803ac90c923d215462b5330b24c8.json @@ -0,0 +1,242 @@ +{ + "file_name": "000e0f3e7d5f803ac90c923d215462b5330b24c8.txt", + "total_words": 464, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "nsw", + "count": 8 + }, + { + "word": "be", + "count": 7 + }, + { + "word": "casino", + "count": 6 + }, + { + "word": "government", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Daniel Mills for Daily Mail Australia .", + "length": 39 + }, + { + "text": "James Packer's Barangaroo casino is set to open in November 2019 .", + "length": 66 + }, + { + "text": "He said its disclosure would subject the related parties to 'potential harm.", + "length": 76 + }, + { + "text": "' A considerable number of pages of the report have also been redacted or blacked-out.", + "length": 86 + }, + { + "text": "It will begin operating on November 15, 2019 after the NSW government backed its proposal.", + "length": 90 + }, + { + "text": "' The Greens will move to subpoena the redacted information during a sitting of parliament next week.", + "length": 102 + }, + { + "text": "'It was the Authority’s view the public interest in its disclosure did not outweigh that potential harm.", + "length": 106 + }, + { + "text": "The NSW Government has decided to keep casino details public citing their disclosure would damage its license .", + "length": 111 + }, + { + "text": "Greens Upper House MP John Kaye, left,  will move to make the 'hidden' documents public during parliament next week .", + "length": 118 + }, + { + "text": "' A NSW Independent Office Of Gaming spokesman has defended the confidential information, citing a lack of public interest.", + "length": 123 + }, + { + "text": "He said this latest report was another example of the Liberals doing 'doing more back-room deals with corporate donors and mates.", + "length": 129 + }, + { + "text": "'Next week, the Greens will be moving to subpoena the missing documents, using the upper House call-for-papers process,' he said.", + "length": 129 + }, + { + "text": "Details regarding James Packer's $2 billion Sydney casino have been kept secret in a NSW Government report outlining how it will be run.", + "length": 137 + }, + { + "text": "' In July, Crown Resorts was granted a licence to operate a VIP-only restricted gaming facility, without poker machines, at Barangaro in Sydney.", + "length": 144 + }, + { + "text": "Greens Upper House MP John Kaye has accused the NSW Government of 'moving into very dangerous territory' considering the latest ICAC investigations which resulted in the resignations of a number NSW Liberal MPs.", + "length": 211 + }, + { + "text": "Eight of the agreements between the parties were to be 'executed contemporaneously' when the casino licence was issued on July 8, but only seven of those have been made public in the 'VIP Gaming Management Agreement.", + "length": 216 + }, + { + "text": "'It will be up to the Labor Party and Fred Nile’s Christian Democrats to decide whether they support an open and accountable process or are happy to let the government cut side deals with an anonymous corporate entity.", + "length": 220 + }, + { + "text": "Contained in the report are agreements between Packer's Crown Resorts and the NSW government, and the rules set out for the casino including who may enter, the types of games which may be played, minimum bet limits and tax rates.", + "length": 229 + }, + { + "text": "Twelve pages and one key stakeholder agreement have been kept confidential by the author - The NSW Independent Liquor and Gaming Authority- which claim their disclosure would be 'commercially damaging' to the operations of the casino licence.", + "length": 242 + }, + { + "text": "' 'The information redacted in the VIP Gaming Management Agreement document would, in the view of the Authority, not promote the objects of the relevant Act and be commercially damaging to the licensee or related entities if released,' he said.", + "length": 244 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7184246182441711 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:27.486992557Z", + "first_section_created": "2025-12-23T09:32:27.487289269Z", + "last_section_published": "2025-12-23T09:32:27.487519378Z", + "all_results_received": "2025-12-23T09:32:27.548277719Z", + "output_generated": "2025-12-23T09:32:27.548426625Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:27.487289269Z", + "publish_time": "2025-12-23T09:32:27.487519378Z", + "first_worker_start": "2025-12-23T09:32:27.488139703Z", + "last_worker_end": "2025-12-23T09:32:27.547332Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:27.488210006Z", + "start_time": "2025-12-23T09:32:27.488270208Z", + "end_time": "2025-12-23T09:32:27.48831321Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:27.48836Z", + "start_time": "2025-12-23T09:32:27.488505Z", + "end_time": "2025-12-23T09:32:27.547332Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:27.488178704Z", + "start_time": "2025-12-23T09:32:27.488232406Z", + "end_time": "2025-12-23T09:32:27.488293409Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:27.488086701Z", + "start_time": "2025-12-23T09:32:27.488139703Z", + "end_time": "2025-12-23T09:32:27.488184304Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2837, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/000e7f26e4dda517050bc09ee8d9a08a3b2334b7.json b/data/output/000e7f26e4dda517050bc09ee8d9a08a3b2334b7.json new file mode 100644 index 0000000..fbdd661 --- /dev/null +++ b/data/output/000e7f26e4dda517050bc09ee8d9a08a3b2334b7.json @@ -0,0 +1,310 @@ +{ + "file_name": "000e7f26e4dda517050bc09ee8d9a08a3b2334b7.txt", + "total_words": 514, + "top_n_words": [ + { + "word": "to", + "count": 17 + }, + { + "word": "the", + "count": 16 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "she", + "count": 12 + }, + { + "word": "melissa", + "count": 11 + }, + { + "word": "they", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "with", + "count": 8 + }, + { + "word": "at", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Miss .", + "length": 6 + }, + { + "text": "5million.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "5million .", + "length": 10 + }, + { + "text": "Childhood .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "even reached security.", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "19:07 EST, 19 August 2013 .", + "length": 27 + }, + { + "text": "09:33 EST, 20 August 2013 .", + "length": 27 + }, + { + "text": "Louise Eccles In Lima, Peru .", + "length": 29 + }, + { + "text": "demands of violent drugs lords.", + "length": 31 + }, + { + "text": "They were arrested as they walked .", + "length": 35 + }, + { + "text": "Now, details have emerged that shed .", + "length": 37 + }, + { + "text": "The women face up to 15 years in jail.", + "length": 38 + }, + { + "text": "from the flat they shared at San Antonio Bay in Ibiza.", + "length": 54 + }, + { + "text": "A judge will decide today whether they should be charged.", + "length": 57 + }, + { + "text": "friend Rebecca Hughes received a Facebook message from Melissa – with .", + "length": 73 + }, + { + "text": "towards the check-in desks at Lima airport on August 6, before they had .", + "length": 73 + }, + { + "text": "Hughes, a former sales assistant, told Melissa’s father William Reid, .", + "length": 73 + }, + { + "text": "the wrong crowd on the Spanish party island and was unable to refuse the .", + "length": 74 + }, + { + "text": "whom she travelled to Spain – a few days after she abruptly disappeared .", + "length": 75 + }, + { + "text": "new light on Melissa’s extraordinary claims – and suggest she fell into .", + "length": 77 + }, + { + "text": "53: ‘She sent me a message saying, “I can’t back out now, they will kill me”.", + "length": 85 + }, + { + "text": "‘It sounds like she has been mixing with undesirables and one thing has led to another.", + "length": 89 + }, + { + "text": "‘I am grateful that she is alive at least – things could have been a whole lot worse,' he said.", + "length": 99 + }, + { + "text": "They were arrested at the airport in Lima, the Peruvian capital, with 11kg of the Class A drug, worth £1.", + "length": 106 + }, + { + "text": "Melissa and Michaella continued to be questioned at high-security Dirandro police station, in Lima, yesterday.", + "length": 110 + }, + { + "text": "Melissa Reid (right), 19, and Irish Michaella McCollum, 20, were arrested at the airport in Lima with 11kg of cocaine, worth £1.", + "length": 129 + }, + { + "text": "Rebecca Hughes (right), said childhood friend Melissa Reid (left) sent her a Facebook message saying she could not back out or she would be killed .", + "length": 148 + }, + { + "text": "If they had been caught with even one more person, they would have faced up to 25 years, because prosecutors assume that groups of three or more are in a cartel.", + "length": 161 + }, + { + "text": "The British girl held for drug smuggling in Peru reportedly sent a desperate message to a friend before her arrest, saying: ‘I can’t back out now, they will kill me.", + "length": 169 + }, + { + "text": "’ Melissa Reid claims she and Michaella McCollum Connolly, both 20, were kidnapped in Ibiza, threatened at gunpoint and forced to fly to Peru to act as drug mules for a Colombian gang.", + "length": 186 + }, + { + "text": "’ She did not want to disclose her full conversation with Melissa but told him: ‘I was going to go to the police but Melissa asked me not to because she said it could endanger her and get her in trouble with the guys.", + "length": 221 + }, + { + "text": "’ Mr Reid, a gas company manager who flew to Peru last Wednesday, said the new information gave him fresh insight into how his ‘beautiful, intelligent’ daughter got caught up in a drugs cartel only a month after arriving in Ibiza, where she was working.", + "length": 259 + }, + { + "text": "Melissa's father William (pictured together in Lima) said his daughter has been mixing with 'undesirables' Melissa, from Lenzie, Glasgow and Michaella, from Dungannon, Northern Ireland, claim the Colombian mafia threatened to harm them and their families if they went to the police or alerted airport staff.", + "length": 307 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6333285570144653 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:27.988306002Z", + "first_section_created": "2025-12-23T09:32:27.989821763Z", + "last_section_published": "2025-12-23T09:32:27.990027171Z", + "all_results_received": "2025-12-23T09:32:28.059662269Z", + "output_generated": "2025-12-23T09:32:28.059883578Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:27.989821763Z", + "publish_time": "2025-12-23T09:32:27.990027171Z", + "first_worker_start": "2025-12-23T09:32:27.990632195Z", + "last_worker_end": "2025-12-23T09:32:28.058693Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:27.990695298Z", + "start_time": "2025-12-23T09:32:27.9907627Z", + "end_time": "2025-12-23T09:32:27.990829703Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:27.990904Z", + "start_time": "2025-12-23T09:32:27.991057Z", + "end_time": "2025-12-23T09:32:28.058693Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:27.990588693Z", + "start_time": "2025-12-23T09:32:27.990671697Z", + "end_time": "2025-12-23T09:32:27.991286922Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:27.990550992Z", + "start_time": "2025-12-23T09:32:27.990632195Z", + "end_time": "2025-12-23T09:32:27.990695698Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2991, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/000efdbb001fd19666b37456e239c78c52908655.json b/data/output/000efdbb001fd19666b37456e239c78c52908655.json new file mode 100644 index 0000000..caf5414 --- /dev/null +++ b/data/output/000efdbb001fd19666b37456e239c78c52908655.json @@ -0,0 +1,432 @@ +{ + "file_name": "000efdbb001fd19666b37456e239c78c52908655.txt", + "total_words": 891, + "top_n_words": [ + { + "word": "the", + "count": 53 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "people", + "count": 15 + }, + { + "word": "will", + "count": 13 + }, + { + "word": "is", + "count": 12 + }, + { + "word": "that", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "'The .", + "length": 6 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Anna Edwards .", + "length": 14 + }, + { + "text": "Duncan Smith`s face.", + "length": 20 + }, + { + "text": "It is a disgrace that .", + "length": 23 + }, + { + "text": "11:58 EST, 30 March 2013 .", + "length": 26 + }, + { + "text": "12:04 EST, 30 March 2013 .", + "length": 26 + }, + { + "text": "That's how easy it happens.", + "length": 27 + }, + { + "text": "Will they be a skiver the next day?", + "length": 35 + }, + { + "text": "'Instead, the UK Government went ahead .", + "length": 40 + }, + { + "text": "this government chose to attack disabled people.", + "length": 48 + }, + { + "text": "and implemented that policy completely unchanged.", + "length": 49 + }, + { + "text": "house a disabled person, rising to four-fifths in Scotland.", + "length": 59 + }, + { + "text": "'One person can be a striver one day and then get made redundant.", + "length": 65 + }, + { + "text": "group Inclusion Scotland, said two-thirds of UK properties affected .", + "length": 69 + }, + { + "text": "' Protestors gather in Trafalgar Square to protest against the policy.", + "length": 70 + }, + { + "text": "insult was adopted liberally at today's march, with protesters wearing .", + "length": 72 + }, + { + "text": "purpose of an equality impact assessment is to find out and then change .", + "length": 73 + }, + { + "text": "t-shirts and waving placards with the word 'ratbag' superimposed over Mr .", + "length": 74 + }, + { + "text": "Some hid their faces behind masks, which have been used by internet hackers.", + "length": 76 + }, + { + "text": "Kelly Parry, representing the National Union of Students, said: 'He is a ratbag.", + "length": 80 + }, + { + "text": "The cuts that are going to come in are going to disproportionately affect women.", + "length": 80 + }, + { + "text": "your policy so it doesn't disproportionately impact on the vulnerable,' he said.", + "length": 80 + }, + { + "text": "The changes are an attempt to encourage those who have spare bedrooms to downsize.", + "length": 82 + }, + { + "text": "Protestors called Iain Duncan Smith a 'ratbag' Bill Scott, from disability campaign .", + "length": 85 + }, + { + "text": "People took to the streets of London, Glasgow and Edinburgh to complain about the tax.", + "length": 86 + }, + { + "text": "Angry protestors paraded through London, days before the new cut will come into effect .", + "length": 88 + }, + { + "text": "It will hit 660,000 households with an average loss of £14 per week, according to Crisis.", + "length": 90 + }, + { + "text": "'This government is killing disabled people, and we must stand together and say enough is enough.", + "length": 97 + }, + { + "text": "poorest households face a bleak April as they struggle to budget for all these cuts coming at once.", + "length": 99 + }, + { + "text": "'We need to fight back so that never again will the people of this country be robbed by Westminster.", + "length": 100 + }, + { + "text": "Thousands protested in Trafalgar Square as charity Crisis warned that the cuts would be devastating .", + "length": 101 + }, + { + "text": "People are already cutting back on the essentials of food and heating but there is only so much they can do.", + "length": 108 + }, + { + "text": "Queues at food banks are poised to get longer and homelessness is expected to increase, according to Crisis.", + "length": 108 + }, + { + "text": "Restrictions on the uprating of a number of welfare payments will also hit millions of households, it warned.", + "length": 109 + }, + { + "text": "Disability rights activist Susan Archibald, in Edinburgh, said: 'We have heard talk about \"strivers and skivers\".", + "length": 113 + }, + { + "text": "Hundreds of people took to Edinburgh's streets to protest against the introduction of the so-called Bedroom Tax .", + "length": 113 + }, + { + "text": "Pensioners are protected under the changes but that is expected to mean a bigger burden on poor working age adults.", + "length": 115 + }, + { + "text": "Protesters took to the streets in Georges Square, Glasgow, to take part in the 'Axe the bedroom tax' demonstration .", + "length": 116 + }, + { + "text": "The plans are aimed at those renting council housing or housing association properties and not those in private properties.", + "length": 123 + }, + { + "text": "Thousands of people have taken to the streets of the UK in protest at cuts to housing benefit for those with spare bedrooms.", + "length": 124 + }, + { + "text": "' Work and Pensions Secretary Iain Duncan Smith was branded a 'ratbag' by a heckler during his visit to Edinburgh last week.", + "length": 124 + }, + { + "text": "Protestors claim the new tax will disproportionately affect disabled people and women, and could lead to evictions and deaths .", + "length": 127 + }, + { + "text": "' The raft of Government welfare cuts that come into effect next week will heap misery on the UK's poorest families, a charity warned today.", + "length": 140 + }, + { + "text": "'There is no doubt that the bedroom tax is an abhorrent policy and the level of outrage surrounding what is being imposed is entirely justified.", + "length": 144 + }, + { + "text": "Around 3,000 people marched on George Square in Glasgow while up to 1,000 people assembled outside the Scottish Parliament in Edinburgh, according to police estimates.", + "length": 167 + }, + { + "text": "The majority of the protests have been organised by Labour Left think tank with others involving the SNP, Greens and SWP together with various Trades Councils and Trade Unions .", + "length": 177 + }, + { + "text": "The protesters insist that the new UK Government policy, dubbed the 'bedroom tax', will disproportionately affect disabled people and women, and could lead to evictions and deaths.", + "length": 180 + }, + { + "text": "Millions of homes will be hit by the combination of reforms that comes in from April, with low-paid workers, the unemployed and disabled people bearing the brunt of the cuts, it added.", + "length": 184 + }, + { + "text": "' John McArdle, from anti-disability discrimination campaign Black Triangle, said: 'Every day our campaign receives more messages from desperate people who are on the brink of suicide.", + "length": 184 + }, + { + "text": "Council tax benefit, which covers some or all of the cost of the bill for struggling families, will be replaced by a new system that will be run by English local authorities but on 10 per cent less funding.", + "length": 206 + }, + { + "text": "'The result will be misery - cold rooms, longer queues at food banks, broken families, missed rent payments and yet more people facing homelessness - devastating for those directly affected, but bad for us all.", + "length": 210 + }, + { + "text": "' In a statement of support for the marchers, SNP MSP Linda Fabiani said: 'The Scottish Government has brought in mitigation measures and has made a firm commitment to scrap the policy in an independent Scotland.", + "length": 212 + }, + { + "text": "Crisis said it also had 'serious concerns' about the replacement of disability living allowance (DLA) with a personal independence payment (PIP), claiming the the assessment process for the new benefit will exclude many disabled people who need support.", + "length": 253 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.7103091478347778 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:28.490799294Z", + "first_section_created": "2025-12-23T09:32:28.491151708Z", + "last_section_published": "2025-12-23T09:32:28.491477222Z", + "all_results_received": "2025-12-23T09:32:28.573405214Z", + "output_generated": "2025-12-23T09:32:28.573621023Z", + "total_processing_time_ms": 82, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 81, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:28.491151708Z", + "publish_time": "2025-12-23T09:32:28.491369917Z", + "first_worker_start": "2025-12-23T09:32:28.491964041Z", + "last_worker_end": "2025-12-23T09:32:28.572408Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:28.491897738Z", + "start_time": "2025-12-23T09:32:28.491964041Z", + "end_time": "2025-12-23T09:32:28.492046944Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:28.492283Z", + "start_time": "2025-12-23T09:32:28.492429Z", + "end_time": "2025-12-23T09:32:28.572408Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:28.491997843Z", + "start_time": "2025-12-23T09:32:28.492075546Z", + "end_time": "2025-12-23T09:32:28.49217655Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:28.491993142Z", + "start_time": "2025-12-23T09:32:28.492060845Z", + "end_time": "2025-12-23T09:32:28.492162349Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:28.491421619Z", + "publish_time": "2025-12-23T09:32:28.491477222Z", + "first_worker_start": "2025-12-23T09:32:28.492137348Z", + "last_worker_end": "2025-12-23T09:32:28.555211Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:28.492146848Z", + "start_time": "2025-12-23T09:32:28.49217285Z", + "end_time": "2025-12-23T09:32:28.49218315Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:28.492446Z", + "start_time": "2025-12-23T09:32:28.492586Z", + "end_time": "2025-12-23T09:32:28.555211Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:28.492111547Z", + "start_time": "2025-12-23T09:32:28.492137348Z", + "end_time": "2025-12-23T09:32:28.492162149Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:28.492082846Z", + "start_time": "2025-12-23T09:32:28.492137348Z", + "end_time": "2025-12-23T09:32:28.492145048Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 141, + "min_processing_ms": 62, + "max_processing_ms": 79, + "avg_processing_ms": 70, + "median_processing_ms": 79, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2706, + "slowest_section_id": 0, + "slowest_section_time_ms": 81 + } +} diff --git a/data/output/000f05fd5e0b3072c1d905a281aa02f15e8382af.json b/data/output/000f05fd5e0b3072c1d905a281aa02f15e8382af.json new file mode 100644 index 0000000..854d9ef --- /dev/null +++ b/data/output/000f05fd5e0b3072c1d905a281aa02f15e8382af.json @@ -0,0 +1,372 @@ +{ + "file_name": "000f05fd5e0b3072c1d905a281aa02f15e8382af.txt", + "total_words": 1284, + "top_n_words": [ + { + "word": "the", + "count": 71 + }, + { + "word": "to", + "count": 47 + }, + { + "word": "in", + "count": 32 + }, + { + "word": "a", + "count": 30 + }, + { + "word": "england", + "count": 27 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "for", + "count": 18 + }, + { + "word": "that", + "count": 17 + }, + { + "word": "his", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "Back to the drawing board?", + "length": 26 + }, + { + "text": "It was desperately needed.", + "length": 26 + }, + { + "text": "10th time in their last 13 one-day internationals.", + "length": 50 + }, + { + "text": "Shane Watson, Australia's number three, plays a shot...", + "length": 55 + }, + { + "text": "he eventually scored 16 runs off 20 balls for the hosts .", + "length": 57 + }, + { + "text": "Pat Cummins (second left) celebrates taking the wicket of Joe Root, who was caught behind .", + "length": 91 + }, + { + "text": "A view of the MA Noble and Don Bradman Stand shows the ground bathed in sunshine on Friday .", + "length": 92 + }, + { + "text": "Morgan gathered his England squad together to give a team-talk before they took to the field .", + "length": 94 + }, + { + "text": "Morgan leads the England players on to the field as a St George's Cross flies in the background .", + "length": 97 + }, + { + "text": "Steve Smith watches on as his shot flies away towards the boundary at the Sydney Cricket Ground .", + "length": 97 + }, + { + "text": "Fans dressed up in the sun to enjoy the One Day International match of Australia against England .", + "length": 98 + }, + { + "text": "At least Morgan, who must have a good World Cup for England to have any chance, is back in business.", + "length": 100 + }, + { + "text": "Eoin Morgan, England's one-day captain, top scored for the visitors with 121 runs against Australia .", + "length": 101 + }, + { + "text": "Ravi Bopara (right) plays a sweep shot on his way to 13 runs off 18 balls for England against Australia .", + "length": 105 + }, + { + "text": "Ian Bell (left) was dismissed off the bowling of Mitchell Starc (centre) in the first ball of the match .", + "length": 105 + }, + { + "text": "Warner and Aaron Finch (right), Australia's opening batsman against England, take to the field on Friday .", + "length": 106 + }, + { + "text": "Jos Buttler (left) appeals unsuccessfully for the wicket of David Warner, who went on to reach a century .", + "length": 106 + }, + { + "text": "Warner bats during the opening match of the One Day International Tri Series between Australia and England .", + "length": 108 + }, + { + "text": "Yes, this is a reality check but it was always expecting a lot for England to hit the ground running against Australia in this series.", + "length": 134 + }, + { + "text": "England will hope Australia prove just as difficult to beat when they face India in Melbourne on Sunday, leaving them to battle it out with MS Dhoni’s side for a place in the final.", + "length": 183 + }, + { + "text": "No-one else had an answer to Starc, the equally rapid Pat Cummins, the left-arm variations of James Faulkner and even the mediocre spinner in Xavier Doherty who England were unable to attack.", + "length": 191 + }, + { + "text": "Yet they will want to do much better than this, starting when they meet that other one-day powerhouse in India in Brisbane on Tuesday and then when they face Australia yet again in Hobart next Friday.", + "length": 200 + }, + { + "text": "The end came with more than 10 overs to spare but only three wickets in hand after Warner, who could have been given out lbw to Moeen Ali on 51, had been dismissed for 127 within sight of the winning post.", + "length": 205 + }, + { + "text": "When Chris Jordan hung around with his captain long enough to add 56 for the eighth wicket England at least looked like they would reach the relative riches of 250 and give themselves something to bowl at.", + "length": 205 + }, + { + "text": "Not really, because England were always likely to be a one-day work in progress here with only a puncher’s chance of competing for a tri-series that also includes India and the premier 50-overs tournament.", + "length": 207 + }, + { + "text": "By the time Morgan came to the crease England were in disarray – welcome to the captaincy, Eoin – but at least he was able to steady his ship and hit his first century since this time last year in Brisbane.", + "length": 210 + }, + { + "text": "England's Chris Woakes (third left) celebrates with his team-mates after taking the wicket of Finch (left) Morgan proved his worth as England's one-day captain when he came in at five to top score for the visitors .", + "length": 215 + }, + { + "text": "It was somehow inevitable that Bell would fail here after showing against a Prime Minister’s XI that he can produce the type of dynamic, substantial innings that has been beyond so many Englishmen in 50-over cricket.", + "length": 218 + }, + { + "text": "The much improved Chris Woakes was quicker than Finn and provided encouragement with four wickets but the decision to keep Stuart Broad away from the new balls suggests he too has not fully recovered from knee surgery.", + "length": 218 + }, + { + "text": "Once Ian Bell, who made 187 just two days ago, was trapped lbw by a rapid Mitchell Starc at the Sydney Cricket Ground, much of the optimism England garnered from two near perfect warm-up performances began to evaporate.", + "length": 219 + }, + { + "text": "Thank heavens then for Eoin Morgan who made a welcome return to form and once again appeared galvanised by high office as he single-handedly saved England from one of the total humiliations common place here last winter.", + "length": 220 + }, + { + "text": "It took only one ball of the first one-day international against Australia on Friday for England to be handed a rude reminder of the size of the task that awaits them here in the tri-series and the World Cup that follows.", + "length": 221 + }, + { + "text": "England left out Jimmy Anderson to give him more time to recover from his knee problem but in his place Finn was again a shadow of the bowler who looked among the best one-day performers in the world in New Zealand in 2013.", + "length": 223 + }, + { + "text": "Warner’s one-day record pales into insignificance compared to his Test and Twenty20 statistics but he loves being a thorn in England’s side and once he had negotiated the two new balls there was absolutely no shifting him.", + "length": 226 + }, + { + "text": "Where Australia have plenty of bowling options Morgan had no choice other than to rely an attack lacking a left-armer of his own and even their main one-day spinner in James Tredwell, surprisingly left out on a worn SCG pitch.", + "length": 226 + }, + { + "text": "When James Taylor followed two balls later and England then crashed to 33 for four it seemed as though this was simply a continuation of the Ashes tour from hell that reached its nadir on this same famous old ground a year ago.", + "length": 227 + }, + { + "text": "England may have breathed a sigh of relief that they did not have to face Johnson here but instead came another left-arm bowler with the pace, hostility and skill to rip the heart out of their innings before it had barely begun.", + "length": 228 + }, + { + "text": "Once David Warner got in the groove that tormented England all too often during the 5-0 thrashing Australia’s domination was clear against an attack worryingly lacking in variation and incapable of defending a sub-standard 234.", + "length": 229 + }, + { + "text": "Clearly the problems that led to Finn being sent home from the Ashes tour with his action in disarray have not totally been solved yet and Finn did well to escape injury when he clattered into the stumps after losing his footing.", + "length": 229 + }, + { + "text": "As it was, Australia cruised to a win so comfortable they were able to gain a bonus point and demonstrated that they remain a formidable one-day machine even without injured captain Michael Clarke and the rested Mitchell Johnson.", + "length": 229 + }, + { + "text": "Yet a century from the new England one-day captain that augers well for the battles ahead was never going to be enough to stop Australia showing that they will be very much the team to beat in the World Cup that begins next month.", + "length": 230 + }, + { + "text": "There was a suspicion that Bell may have got an inside edge onto his pad but the replays were inconclusive and not even the Decision Review System that will not be used in this tri-series at India’s insistence would have saved him.", + "length": 233 + }, + { + "text": "Starc appears to have added the priceless ability to swing the ball back into right-handers to his armoury and here he was simply too quick for both Bell and Taylor as England made the worst possible start to their busiest ever year.", + "length": 233 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5533524751663208 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:28.992465854Z", + "first_section_created": "2025-12-23T09:32:28.992842769Z", + "last_section_published": "2025-12-23T09:32:28.993230084Z", + "all_results_received": "2025-12-23T09:32:29.089980872Z", + "output_generated": "2025-12-23T09:32:29.090255283Z", + "total_processing_time_ms": 97, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 96, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:28.992842769Z", + "publish_time": "2025-12-23T09:32:28.99312398Z", + "first_worker_start": "2025-12-23T09:32:28.993649201Z", + "last_worker_end": "2025-12-23T09:32:29.064734Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:28.993940313Z", + "start_time": "2025-12-23T09:32:28.994082619Z", + "end_time": "2025-12-23T09:32:28.994194523Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:28.993971Z", + "start_time": "2025-12-23T09:32:28.994153Z", + "end_time": "2025-12-23T09:32:29.064734Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:28.993785407Z", + "start_time": "2025-12-23T09:32:28.99387401Z", + "end_time": "2025-12-23T09:32:28.994011816Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:28.993574598Z", + "start_time": "2025-12-23T09:32:28.993649201Z", + "end_time": "2025-12-23T09:32:28.993689703Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:28.993161682Z", + "publish_time": "2025-12-23T09:32:28.993230084Z", + "first_worker_start": "2025-12-23T09:32:28.99387171Z", + "last_worker_end": "2025-12-23T09:32:29.089061Z", + "total_journey_time_ms": 95, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:28.993940413Z", + "start_time": "2025-12-23T09:32:28.993992615Z", + "end_time": "2025-12-23T09:32:28.994057518Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:28.994067Z", + "start_time": "2025-12-23T09:32:28.994199Z", + "end_time": "2025-12-23T09:32:29.089061Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 94 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:28.993826108Z", + "start_time": "2025-12-23T09:32:28.99387171Z", + "end_time": "2025-12-23T09:32:28.993959614Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:28.993826808Z", + "start_time": "2025-12-23T09:32:28.99387661Z", + "end_time": "2025-12-23T09:32:28.993905812Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 164, + "min_processing_ms": 70, + "max_processing_ms": 94, + "avg_processing_ms": 82, + "median_processing_ms": 94, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3544, + "slowest_section_id": 1, + "slowest_section_time_ms": 95 + } +} diff --git a/data/output/000f9a3513a610ff46580007e9ef7e9dd0bc9fef.json b/data/output/000f9a3513a610ff46580007e9ef7e9dd0bc9fef.json new file mode 100644 index 0000000..f1d7fd5 --- /dev/null +++ b/data/output/000f9a3513a610ff46580007e9ef7e9dd0bc9fef.json @@ -0,0 +1,298 @@ +{ + "file_name": "000f9a3513a610ff46580007e9ef7e9dd0bc9fef.txt", + "total_words": 695, + "top_n_words": [ + { + "word": "a", + "count": 29 + }, + { + "word": "the", + "count": 28 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "that", + "count": 20 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "girls", + "count": 13 + }, + { + "word": "she", + "count": 10 + }, + { + "word": "her", + "count": 9 + }, + { + "word": "on", + "count": 9 + }, + { + "word": "said", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "However the search revealed nothing .", + "length": 37 + }, + { + "text": "She had a silver necklace with a peace symbol on it.", + "length": 52 + }, + { + "text": "Patricia Spencer has brown hair and blue eyes and wears glasses.", + "length": 64 + }, + { + "text": "Pamela's sister, Mary Buehrle, told 48 Hours' Crimesider: 'I can't move on.", + "length": 76 + }, + { + "text": "The tip was never acted upon, Chief David discovered after becoming police chief.", + "length": 81 + }, + { + "text": "Police search teams with cadaver dogs went to the barn but nothing was discovered.", + "length": 82 + }, + { + "text": "Pamela Hobley (pictured left) would now be 60 and Patricia Spencer (right) would be 61.", + "length": 87 + }, + { + "text": "I just hope that someone knows something and will finally have the guts to say something.", + "length": 89 + }, + { + "text": "Oscoda Township Police Department asked that anyone with information call them at 989-739-9113.", + "length": 95 + }, + { + "text": "According to Missing Persons of America, the girls were not usually known to hang out together.", + "length": 95 + }, + { + "text": "Decades after the mysterious disappearance, relatives of the young women refuse to give up hope.", + "length": 96 + }, + { + "text": "They are seen in age-progression photographs created by the National Center For Missing Children .", + "length": 98 + }, + { + "text": "Pamela's mother died without ever finding out what had happened to her daughter, Ms Buehrle said.", + "length": 98 + }, + { + "text": "He believes that the investigation got off to a slow start because the girls were deemed runaways.", + "length": 98 + }, + { + "text": "She had brown hair, brown eyes, a scar on her nose and a birthmark on the left corner of her mouth.", + "length": 100 + }, + { + "text": "The girls had told family and friends that they were then heading to a Halloween party - but never showed up.", + "length": 109 + }, + { + "text": "Another tip from a witness said that he had given the girls a ride in his car but that turned out to be false.", + "length": 110 + }, + { + "text": "Two families have renewed their campaign to find two teenage girls who vanished on Halloween 45 years ago today.", + "length": 112 + }, + { + "text": "' Ms Buehrle said she remembered that night as she had gone trick-or-treating with her mother and other sisters.", + "length": 112 + }, + { + "text": "Patricia's sister told the show that she believed the two girls were picked up by the wrong person and were murdered.", + "length": 117 + }, + { + "text": "He said that he wanted to bring the girls home to give their families closure after almost half a century of not knowing.", + "length": 121 + }, + { + "text": "She was wearing a brown sweater, gray and green plaid jacket, brown tween or plaid skirt, and brown shoes with a chunky heel.", + "length": 125 + }, + { + "text": "Chief David has combed over hundreds of old statements and tips involving the case that were not followed up upon at the time.", + "length": 126 + }, + { + "text": "Pamela Hobley's sister, Mary Buehrle, pictured, said that she believes someone murdered her sister and her friend 45 yeras ago .", + "length": 128 + }, + { + "text": "Last year, a new tip emerged from a witness who said that he given the girls a ride and left them at a gas station on River Road.", + "length": 129 + }, + { + "text": "Police dug up a barn in Wilber Township, around 11 miles from Oscoda after receiving a tip that the girls were buried beneath it.", + "length": 130 + }, + { + "text": "Oscoda Township Police Chief Mark David, who took over the case in 2010, also believes that the girls may have been victims of foul play.", + "length": 137 + }, + { + "text": "The girls, who did not usually hang out together, relatives said, left the school early on Halloween to go to a party but never arrived .", + "length": 137 + }, + { + "text": "Oscoda High School was subject to a bomb threat that day and family members believed that the girls may have left school early that afternoon.", + "length": 142 + }, + { + "text": "The chief said that when he was a teenager it was rumored that the girls had been buried under a barn in Wilber Township, around 11 miles from Oscoda.", + "length": 150 + }, + { + "text": "Pamela Hobley, 15, (left) and Patricia Spencer, 17, (right) went missing on October 31, 1969 after going to a high school football game in Oscoda, Michigan .", + "length": 157 + }, + { + "text": "Patricia Spencer, 17, and her 15-year-old friend Pamela Hobley went missing on October 31, 1969 after going to a high school football game in Oscoda, Michigan.", + "length": 159 + }, + { + "text": "She said her mother, a single mom, was told by Pamela that she was going to a Halloween party with her friends and boyfriend, to whom she had recently become engaged.", + "length": 166 + }, + { + "text": "When they were last seen, Miss Hobley was wearing a white fake fur coat wit brown fur trim, a long-sleeved blouse with ruffled cuffs and a brown and white plaid skirt, white kneesocks and shoes with a chunky heel.", + "length": 213 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5829138159751892 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:29.494008708Z", + "first_section_created": "2025-12-23T09:32:29.494291819Z", + "last_section_published": "2025-12-23T09:32:29.494606332Z", + "all_results_received": "2025-12-23T09:32:29.555669986Z", + "output_generated": "2025-12-23T09:32:29.555820692Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:29.494291819Z", + "publish_time": "2025-12-23T09:32:29.494606332Z", + "first_worker_start": "2025-12-23T09:32:29.495128753Z", + "last_worker_end": "2025-12-23T09:32:29.55476Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:29.495074951Z", + "start_time": "2025-12-23T09:32:29.495148254Z", + "end_time": "2025-12-23T09:32:29.495268059Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:29.495316Z", + "start_time": "2025-12-23T09:32:29.495451Z", + "end_time": "2025-12-23T09:32:29.55476Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:29.495121953Z", + "start_time": "2025-12-23T09:32:29.495188355Z", + "end_time": "2025-12-23T09:32:29.495275159Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:29.49504825Z", + "start_time": "2025-12-23T09:32:29.495128753Z", + "end_time": "2025-12-23T09:32:29.495169555Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3885, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/000fa431cc8c28e29752db172774b9de1dcd1129.json b/data/output/000fa431cc8c28e29752db172774b9de1dcd1129.json new file mode 100644 index 0000000..cde3783 --- /dev/null +++ b/data/output/000fa431cc8c28e29752db172774b9de1dcd1129.json @@ -0,0 +1,520 @@ +{ + "file_name": "000fa431cc8c28e29752db172774b9de1dcd1129.txt", + "total_words": 998, + "top_n_words": [ + { + "word": "the", + "count": 66 + }, + { + "word": "in", + "count": 37 + }, + { + "word": "s", + "count": 32 + }, + { + "word": "of", + "count": 31 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "was", + "count": 21 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "u", + "count": 20 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "laude", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "protests.", + "length": 9 + }, + { + "text": "Accused: U.", + "length": 11 + }, + { + "text": "Marine Pfc.", + "length": 11 + }, + { + "text": "Philippine and U.", + "length": 17 + }, + { + "text": "Pacific Commander Adm.", + "length": 22 + }, + { + "text": "Smith was held at the U.", + "length": 24 + }, + { + "text": "Earlier this week,  the U.", + "length": 27 + }, + { + "text": "Marine accused of her murder.", + "length": 29 + }, + { + "text": "military turned over Marine Pfc.", + "length": 32 + }, + { + "text": "Gregorio Pio Catapang told a news conference.", + "length": 45 + }, + { + "text": "soldier accused of killing transgender Jennifer .", + "length": 49 + }, + { + "text": "Under the Visiting Forces Agreement, which allows U.", + "length": 52 + }, + { + "text": "Left-wing groups have staged small protests at the U.", + "length": 53 + }, + { + "text": "'They agreed to put him in a facility which will pass U.", + "length": 56 + }, + { + "text": "and the Philippines, Philippine military chief of staff Gen.", + "length": 60 + }, + { + "text": "military personnel must serve any sentence in Philippine detention.", + "length": 67 + }, + { + "text": "Joseph Scott Pemberton (center) is transported to Filipino custody .", + "length": 68 + }, + { + "text": "authorities engaged in a high-profile custody battle over another U.", + "length": 68 + }, + { + "text": "The Philippine Supreme Court, however, ruled in 2009 that convicted U.", + "length": 70 + }, + { + "text": "custodial standards,' Philippine Defense Secretary Voltaire Gazmin said.", + "length": 72 + }, + { + "text": "custody provision as proof that the accord was lopsided in favor of the U.", + "length": 74 + }, + { + "text": "Pemberton's transfer by helicopter to Manila early Wednesday was agreed by the U.", + "length": 81 + }, + { + "text": "Killed: Transgender Jennifer Laude, 26, was found dead in a hotel in Olongapo City .", + "length": 85 + }, + { + "text": "Marines while Philippine military guards will be posted outside the compound, he said.", + "length": 86 + }, + { + "text": "and undermines the sovereignty of the country, which was an American colony until 1946.", + "length": 87 + }, + { + "text": "He said that Pemberton will be detained in an air-conditioned van, directly guarded by U.", + "length": 89 + }, + { + "text": "'We're happy with this because he's a suspect in a crime that was committed in our country.", + "length": 91 + }, + { + "text": "marine Joseph Scott Pemberton, 19, from Massachusetts  is accused of strangling Laude, 26 .", + "length": 92 + }, + { + "text": "However, Jennifer Laude's family have expressed their anger at Pemberton being guarded by U.", + "length": 92 + }, + { + "text": "Action: Susselbeck pushed past one of the Filipino guards in an attempt to get access to the base .", + "length": 99 + }, + { + "text": "She had apparently been drowned in the toilet bowl, according to police Chief Inspector Gil Domingo.", + "length": 100 + }, + { + "text": "Joseph Scott Pemberton, 19, from New Bedford, Massachusetts, to the Philippine military's main camp.", + "length": 100 + }, + { + "text": "has custody over them 'from the commission of the offense until completion of all judicial proceedings.", + "length": 103 + }, + { + "text": "Jennifer Laude's German boyfriend Marc Susselbeck also jumped over the fence, and was seen pushing an army guard.", + "length": 114 + }, + { + "text": "forces to conduct combat drills in the country, the Philippines can prosecute American service members, but the U.", + "length": 114 + }, + { + "text": "Anger: German Marc Susselbeck, Laude's boyfriend, follows over the gates as the family demands to see the teenage U.", + "length": 116 + }, + { + "text": "Eyewitnesses say Pemberton met her in a nightclub and didn't know she was transgender until they were in a hotel room.", + "length": 118 + }, + { + "text": "The boyfriend and sister of Jennifer Laude, 26, climbed into the military base in Manila, demanding to see the teenage U.", + "length": 121 + }, + { + "text": "forces at the base and yesterday gathered outside the base, demanding proof that the teenage marine was really kept there.", + "length": 122 + }, + { + "text": "Laude, who was born Jeffrey, was found dead in a hotel in Olongapo City, around two hours drive from Manila, on October 11.", + "length": 123 + }, + { + "text": "Embassy in Manila until a Philippine appeals court overturned his conviction in 2009, allowing him to leave the country amid anti-U.", + "length": 132 + }, + { + "text": "Mourning: Relatives of Jennifer Laude, including her sister and her German boyfriend, walk during her funeral march in Olongapo City .", + "length": 135 + }, + { + "text": "Secretary of State John Kerry said Monday that Washington seeks no special privilege for the suspect but only protection of his rights.", + "length": 135 + }, + { + "text": "Marilou Laude, the sister of Jennifer, who was born Jefferey, climbed over the fence and into the base and was swiftly captured by guards.", + "length": 138 + }, + { + "text": "Pemberton was detained on board the USS Peleliu at the Subic Bay Freeport, about 50 miles northwest of Manila, after he was implicated in the death.", + "length": 148 + }, + { + "text": "Protest: Students gather for a rally at the University of the Philippines campus at suburban Quezon city, northeast of Manila, to demand justice for Jennifer .", + "length": 159 + }, + { + "text": "' Tragedy: Jennifer's mother Julita Laude cries in front of the coffin of her daughter during her funeral in Olongapo City, Zambales province north of Manila .", + "length": 159 + }, + { + "text": "Laude, who was said to have been unemployed and living off a monthly allowance from Susselbeck, was later found strangled to death there, her head in a toilet bowl.", + "length": 164 + }, + { + "text": "President Benigno Aquino III on Monday defended the agreement, saying a crime can happen anywhere and 'the sin of one person' should not reflect on the entire country.", + "length": 167 + }, + { + "text": "Joseph Scott Pemberton is escorted to his detention as he arrives inside the compound of the Camp Aguinaldo military headquarters in suburban Quezon city, north of Manila .", + "length": 172 + }, + { + "text": "Marine, Daniel Smith, who was found guilty and sentenced to life in prison on charges of raping a Filipino woman after a night of drinking in 2005, also at the Subic freeport.", + "length": 175 + }, + { + "text": "In the latest case, Philippine police and witnesses said Pemberton and Laude met at a bar in the city of Olongapo on October 11, then went to a hotelroom where Laude's body was later found in the bathroom.", + "length": 205 + }, + { + "text": "Dozens of angry Filipinos gathered outside a military base where a young American soldier is being held accused of murdering a transgender woman, in a protest which ended in the victim's family scaling the fence .", + "length": 213 + }, + { + "text": "Samuel Locklear has ordered Pemberton's amphibious assault ship, which joined large-scale combat exercises with Filipino forces that ended October 10, to stay in the Philippines until the investigation was completed.", + "length": 216 + }, + { + "text": "Protest: Marilou Laude, the sister of murdered Filipina Jennifer Laude, climbs the gate of the facility where PfC Joseph Scott Pemberton is detained at the Armed Forces of the Philippine (AFP) headquarters in Manila .", + "length": 217 + }, + { + "text": "Embassy in Manila and at the Subic wharf where the USS Peleliu is moored, demanding Washington to turn over Pemberton to Philippine authorities for prosecution and detention and calling for the 1998 agreement's abrogation.", + "length": 222 + }, + { + "text": "' Foreign Secretary Albert del Rosario, who has described Laude's death as tragic, said that the arrangement showed 'mechanisms are in place so that justice can be served,' adding there was strong cooperation between the treaty allies.", + "length": 235 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7717909216880798 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:29.995397156Z", + "first_section_created": "2025-12-23T09:32:29.995806973Z", + "last_section_published": "2025-12-23T09:32:29.996182388Z", + "all_results_received": "2025-12-23T09:32:30.094593642Z", + "output_generated": "2025-12-23T09:32:30.094816851Z", + "total_processing_time_ms": 99, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 98, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:29.995806973Z", + "publish_time": "2025-12-23T09:32:29.996086784Z", + "first_worker_start": "2025-12-23T09:32:29.996587504Z", + "last_worker_end": "2025-12-23T09:32:30.093709Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:29.996759211Z", + "start_time": "2025-12-23T09:32:29.996833914Z", + "end_time": "2025-12-23T09:32:29.996936518Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:29.997101Z", + "start_time": "2025-12-23T09:32:29.99726Z", + "end_time": "2025-12-23T09:32:30.093709Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 96 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:29.996716209Z", + "start_time": "2025-12-23T09:32:29.996773412Z", + "end_time": "2025-12-23T09:32:29.996871816Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:29.996520201Z", + "start_time": "2025-12-23T09:32:29.996587504Z", + "end_time": "2025-12-23T09:32:29.996650407Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:29.996133386Z", + "publish_time": "2025-12-23T09:32:29.996182388Z", + "first_worker_start": "2025-12-23T09:32:29.996763611Z", + "last_worker_end": "2025-12-23T09:32:30.070542Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:29.997001321Z", + "start_time": "2025-12-23T09:32:29.997025222Z", + "end_time": "2025-12-23T09:32:29.997046923Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:29.997267Z", + "start_time": "2025-12-23T09:32:29.997399Z", + "end_time": "2025-12-23T09:32:30.070542Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:29.996716009Z", + "start_time": "2025-12-23T09:32:29.996763611Z", + "end_time": "2025-12-23T09:32:29.996799813Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:29.996810113Z", + "start_time": "2025-12-23T09:32:29.996834514Z", + "end_time": "2025-12-23T09:32:29.996849515Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 169, + "min_processing_ms": 73, + "max_processing_ms": 96, + "avg_processing_ms": 84, + "median_processing_ms": 96, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3002, + "slowest_section_id": 0, + "slowest_section_time_ms": 97 + } +} diff --git a/data/output/000fdddcbfce58b56605f99e81c22c9b86ea4773.json b/data/output/000fdddcbfce58b56605f99e81c22c9b86ea4773.json new file mode 100644 index 0000000..7a2b317 --- /dev/null +++ b/data/output/000fdddcbfce58b56605f99e81c22c9b86ea4773.json @@ -0,0 +1,238 @@ +{ + "file_name": "000fdddcbfce58b56605f99e81c22c9b86ea4773.txt", + "total_words": 315, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "inmates", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "to", + "count": 6 + }, + { + "word": "were", + "count": 6 + }, + { + "word": "county", + "count": 5 + }, + { + "word": "an", + "count": 4 + }, + { + "word": "are", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Castro .", + "length": 8 + }, + { + "text": "Jill Reilly .", + "length": 13 + }, + { + "text": "Tuesday and Wednesday.", + "length": 22 + }, + { + "text": ", county spokeswoman Kathleen Castro.", + "length": 37 + }, + { + "text": "Sorry we are not currently accepting comments on this article.", + "length": 62 + }, + { + "text": "get extensive flooding during heavy rains that drenched the region .", + "length": 68 + }, + { + "text": "didn't know if flooding in the area was a factor but says the jail did .", + "length": 72 + }, + { + "text": "Escambia County is located in the northwestern part of Florida, in the southern United States.", + "length": 94 + }, + { + "text": "Roughly 100 -150 inmates and correctional officers were hurt she said, as she confirmed two people had died.", + "length": 108 + }, + { + "text": "The explosion shook houses for several blocks and the areas has been quarantined by Sheriff's Office personnel.", + "length": 111 + }, + { + "text": "The injured were brought to hospitals and the uninjured inmates were brought to jails in neighboring counties, Castro said.", + "length": 123 + }, + { + "text": "Two inmates have died and more than a hundred have been injured after an apparent gas explosion at a jail in northern Florida.", + "length": 126 + }, + { + "text": "She said there was a partial building collapse, a roof collapse and that it is believed that the walls are unstable in the facility.", + "length": 132 + }, + { + "text": "The blast partly leveled the four-story Escambia County Jail's central booking facility, which held roughly 600 inmates, at about 11 p.", + "length": 135 + }, + { + "text": "Kendrick Doidge said West Florida Hospital treated 37 inmates in the emergency room and all have been released back into the custody of the Escambia County Sheriff's Office.", + "length": 173 + }, + { + "text": "An updated statement on the Escambia County website later made no mention of gas, saying only it was an 'apparent explosion' after reporting earlier it was 'an apparent gas explosion'.", + "length": 184 + }, + { + "text": "Castro described a frenetic scene where officials were scrambling to get people out of the building, provide medical care, and working to make sure inmates were detained and routed to other facilities.", + "length": 201 + }, + { + "text": "Sacred Heart Hospital spokesman Mike Burke told Pensacola News Journal that 31 people were brought to the hospital by ambulances and law enforcement officers are at the hospital providing security as the inmates are being treated.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8359448313713074 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:30.496949611Z", + "first_section_created": "2025-12-23T09:32:30.497244323Z", + "last_section_published": "2025-12-23T09:32:30.497451531Z", + "all_results_received": "2025-12-23T09:32:30.564039107Z", + "output_generated": "2025-12-23T09:32:30.564517426Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:30.497244323Z", + "publish_time": "2025-12-23T09:32:30.497451531Z", + "first_worker_start": "2025-12-23T09:32:30.498082657Z", + "last_worker_end": "2025-12-23T09:32:30.563127Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:30.498086957Z", + "start_time": "2025-12-23T09:32:30.498151459Z", + "end_time": "2025-12-23T09:32:30.498196261Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:30.498154Z", + "start_time": "2025-12-23T09:32:30.498292Z", + "end_time": "2025-12-23T09:32:30.563127Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:30.498022054Z", + "start_time": "2025-12-23T09:32:30.498082657Z", + "end_time": "2025-12-23T09:32:30.498125758Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:30.498047455Z", + "start_time": "2025-12-23T09:32:30.498109458Z", + "end_time": "2025-12-23T09:32:30.499687021Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 1 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 1, + "min_processing_ms": 1, + "max_processing_ms": 1, + "avg_processing_ms": 1, + "median_processing_ms": 1, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1922, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/001065de2a3f143967f10a5976e1f2722e5629f5.json b/data/output/001065de2a3f143967f10a5976e1f2722e5629f5.json new file mode 100644 index 0000000..6370e18 --- /dev/null +++ b/data/output/001065de2a3f143967f10a5976e1f2722e5629f5.json @@ -0,0 +1,466 @@ +{ + "file_name": "001065de2a3f143967f10a5976e1f2722e5629f5.txt", + "total_words": 702, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "he", + "count": 22 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "was", + "count": 19 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "his", + "count": 17 + }, + { + "word": "cowell", + "count": 15 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "in", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "A .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "He .", + "length": 4 + }, + { + "text": "Sir .", + "length": 5 + }, + { + "text": "45pm.", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Baby .", + "length": 6 + }, + { + "text": "father.", + "length": 7 + }, + { + "text": "spoiled.", + "length": 8 + }, + { + "text": "adored...", + "length": 9 + }, + { + "text": "the baby’.", + "length": 12 + }, + { + "text": "lovely fella.", + "length": 13 + }, + { + "text": "several days.", + "length": 13 + }, + { + "text": "Simon’s a .", + "length": 13 + }, + { + "text": "assumed name.", + "length": 13 + }, + { + "text": "His spokesman .", + "length": 15 + }, + { + "text": "’ And Piers .", + "length": 15 + }, + { + "text": "Caroline Graham .", + "length": 17 + }, + { + "text": "Simon will give .", + "length": 17 + }, + { + "text": "’ Silverman’s .", + "length": 19 + }, + { + "text": "Named after my dad.", + "length": 19 + }, + { + "text": "Healthy and handsome.", + "length": 21 + }, + { + "text": "Lauren in great shape.", + "length": 22 + }, + { + "text": "London and Los Angeles.", + "length": 23 + }, + { + "text": "45pm New York time (10.", + "length": 23 + }, + { + "text": "The suite is spectacular.", + "length": 25 + }, + { + "text": "It has two bedrooms, two .", + "length": 26 + }, + { + "text": "He hasn’t stopped smiling.", + "length": 28 + }, + { + "text": "He’s besotted by the baby.", + "length": 28 + }, + { + "text": "bathrooms and a sitting room.", + "length": 29 + }, + { + "text": "daughter Blue Ivy two years ago.", + "length": 32 + }, + { + "text": "Thanks for all of your kind messages.", + "length": 37 + }, + { + "text": "The first call he made was to his mum.", + "length": 38 + }, + { + "text": "lovely tribute to name his son after him.", + "length": 41 + }, + { + "text": "She is reportedly 'thrilled' with the news.", + "length": 43 + }, + { + "text": "He is hoping he will have an English accent.", + "length": 44 + }, + { + "text": "Cowell wants his son to have an English nanny.", + "length": 46 + }, + { + "text": "’’ He said it was the best day of his life.", + "length": 47 + }, + { + "text": "45am call from Lauren saying she was in labour.", + "length": 47 + }, + { + "text": "‘It was such an incredible and emotional day.", + "length": 47 + }, + { + "text": "He has previously pledged ‘not to smoke around .", + "length": 50 + }, + { + "text": "’ He made a transatlantic dash after receiving a 5.", + "length": 53 + }, + { + "text": "Silverman, saying: ‘It’s too soon to discuss that.", + "length": 54 + }, + { + "text": "source added: ‘Eric will be brought up traditionally.", + "length": 55 + }, + { + "text": "‘He told him, ‘‘I’ve waited all my life for you.", + "length": 56 + }, + { + "text": "A source said: ‘He is on a pre-planned vacation with his .", + "length": 60 + }, + { + "text": "‘Simon was by his girlfriend Lauren’s side the entire time.", + "length": 63 + }, + { + "text": "saying: ‘A young Simon Cowell with  his father Eric, who he .", + "length": 64 + }, + { + "text": "former close friend Andrew, will not see his new half-brother for .", + "length": 67 + }, + { + "text": "‘You can have  a family and still enjoy being  in showbusiness.", + "length": 67 + }, + { + "text": "Lenox Hill Hospital that singer Beyoncé used for the birth of her .", + "length": 68 + }, + { + "text": "The source added: ‘The baby was handed to Lauren and then to Simon.", + "length": 69 + }, + { + "text": "Green and hypnotist Paul  McKenna are also expected to be godparents.", + "length": 70 + }, + { + "text": "adding that he was planning to install smoking rooms in his homes in .", + "length": 70 + }, + { + "text": "45pm GMT) on Friday, 17 hours after Lauren Silverman went into labour.", + "length": 70 + }, + { + "text": "him the best education his money can buy and doesn’t want him to be .", + "length": 71 + }, + { + "text": "had vowed to give up his 20-a-day habit but later joked he was lying, .", + "length": 71 + }, + { + "text": "hospital insider said:  ‘Lauren checked into the hospital under an .", + "length": 71 + }, + { + "text": "Tom Jones spoke before  a charity performance in Los Angeles to say: .", + "length": 71 + }, + { + "text": "Morgan took to Twitter to share a photograph of Cowell and his father, .", + "length": 72 + }, + { + "text": "eight-year-old son Adam, whose father is her ex-husband and Cowell’s .", + "length": 72 + }, + { + "text": "last night refused to comment on reports that he planned to propose to .", + "length": 72 + }, + { + "text": "Eric was born in the same £2,000-a-night luxury suite in Manhattan’s .", + "length": 73 + }, + { + "text": "’ The birth was greeted with an outpouring of support from Cowell’s friends.", + "length": 80 + }, + { + "text": "(Right) Simon Cowell and Lauren Silveman, now proud parents, on a date in London .", + "length": 82 + }, + { + "text": "‘He was in tears as he watched the birth and broke down when Eric was handed to him.", + "length": 86 + }, + { + "text": "Cowell announced the news on Twitter, saying: ‘Very happy to say Eric was born at 17.", + "length": 87 + }, + { + "text": "‘When he finally left the hospital he went and celebrated with a Corona and a burger.", + "length": 87 + }, + { + "text": "’ Cowell chose ex-girlfriend and singer Sinitta (left) to be the godmother to his new son.", + "length": 92 + }, + { + "text": "Simon Cowell wept when he was handed his newborn son on Valentine’s Day, it was revealed last night.", + "length": 102 + }, + { + "text": "New dad: Simon Cowell arrives at Lenox Hill Hospital in New York this afternoon, posing for pictures .", + "length": 102 + }, + { + "text": "’ Smoker Cowell puffed away outside the hospital immediately before – and after – Eric’s birth.", + "length": 103 + }, + { + "text": "’ Sinitta, who has known Cowell since the 1980s, said she was ‘thrilled and honoured’ to have been chosen as godmother.", + "length": 125 + }, + { + "text": "’ Eric Philip, named after Cowell’s late father and his best friend, Topshop billionaire Sir Philip Green, was born at 5.", + "length": 125 + }, + { + "text": "Cowell tweeted the news mere hours after his son was born, confirming the child's name and the fact that mum and baby were doing fine .", + "length": 135 + }, + { + "text": "The X Factor supremo is said to be ‘delirious with joy’ after the birth of 6lb 7oz Eric Philip, who he greeted with the words: ‘I’ve waited all my life for you.", + "length": 168 + }, + { + "text": "’ A close friend of 54-year-old Cowell, who has chosen ex- girlfriend Sinitta to be the child’s godmother, said: ‘Simon was much more emotional than  he expected.", + "length": 169 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.43413418531417847 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:30.998163152Z", + "first_section_created": "2025-12-23T09:32:30.998422063Z", + "last_section_published": "2025-12-23T09:32:30.998702574Z", + "all_results_received": "2025-12-23T09:32:31.072883855Z", + "output_generated": "2025-12-23T09:32:31.073157966Z", + "total_processing_time_ms": 74, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 74, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:30.998422063Z", + "publish_time": "2025-12-23T09:32:30.998702574Z", + "first_worker_start": "2025-12-23T09:32:30.999197694Z", + "last_worker_end": "2025-12-23T09:32:31.072051Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:30.999253896Z", + "start_time": "2025-12-23T09:32:30.9993614Z", + "end_time": "2025-12-23T09:32:30.999447204Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:30.999658Z", + "start_time": "2025-12-23T09:32:30.999853Z", + "end_time": "2025-12-23T09:32:31.072051Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:30.999231895Z", + "start_time": "2025-12-23T09:32:30.999306898Z", + "end_time": "2025-12-23T09:32:30.999416703Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:30.999128491Z", + "start_time": "2025-12-23T09:32:30.999197694Z", + "end_time": "2025-12-23T09:32:30.999241596Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 72, + "min_processing_ms": 72, + "max_processing_ms": 72, + "avg_processing_ms": 72, + "median_processing_ms": 72, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3985, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/001097a19e2c96de11276b3cce11566ccfed0030.json b/data/output/001097a19e2c96de11276b3cce11566ccfed0030.json new file mode 100644 index 0000000..08a2b5f --- /dev/null +++ b/data/output/001097a19e2c96de11276b3cce11566ccfed0030.json @@ -0,0 +1,468 @@ +{ + "file_name": "001097a19e2c96de11276b3cce11566ccfed0030.txt", + "total_words": 1132, + "top_n_words": [ + { + "word": "the", + "count": 57 + }, + { + "word": "a", + "count": 33 + }, + { + "word": "to", + "count": 31 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "s", + "count": 18 + }, + { + "word": "ces", + "count": 16 + }, + { + "word": "at", + "count": 15 + }, + { + "word": "that", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "\" Apps .", + "length": 8 + }, + { + "text": "Tablets .", + "length": 9 + }, + { + "text": ") Ultrabooks .", + "length": 14 + }, + { + "text": "Connected cars .", + "length": 16 + }, + { + "text": "Sorry, Microsoft.", + "length": 17 + }, + { + "text": "Or to show up at all.", + "length": 21 + }, + { + "text": "tech to be their guide.", + "length": 23 + }, + { + "text": "Internet TVs that also do 3-D .", + "length": 31 + }, + { + "text": "\"It's the computer on four wheels.", + "length": 34 + }, + { + "text": "\" Netbooks appear to be on their way out.", + "length": 41 + }, + { + "text": "Windows won't be just for tablets, of course.", + "length": 45 + }, + { + "text": "\"It's no longer an optional part of the business.", + "length": 49 + }, + { + "text": "\" Software makers are eager to exploit this reality.", + "length": 52 + }, + { + "text": "Several car companies have lined up CES presentations.", + "length": 54 + }, + { + "text": "\"It's the only trade show I attend all year,\" she said.", + "length": 55 + }, + { + "text": "Analysts expect to see a bevy of Windows 8 tablets at CES.", + "length": 58 + }, + { + "text": "' \" a Microsoft spokesman asked rhetorically in a statement.", + "length": 60 + }, + { + "text": "Don't expect Apple to show up at CES with a big screen though.", + "length": 62 + }, + { + "text": "Netflix and Pandora have seen tremendous growth on TV platforms.", + "length": 64 + }, + { + "text": "(The Fire actually has quite a bit of Android code under the hood.", + "length": 66 + }, + { + "text": "Decades since its formation, the yearly six-day event is a spectacle.", + "length": 69 + }, + { + "text": "The organizers expect to welcome at least that many people next week.", + "length": 69 + }, + { + "text": "\"These companies see the car as a software platform,\" Westergren said.", + "length": 70 + }, + { + "text": "Google and Samsung last month released the first phone running Android 4.", + "length": 73 + }, + { + "text": "\"And then you've got Microsoft pushing the touchscreen tablet experience.", + "length": 73 + }, + { + "text": "Meanwhile, Internet services are also working their way onto television sets.", + "length": 77 + }, + { + "text": "Many say they will tout their wares at partners' booths and in private meetings.", + "length": 80 + }, + { + "text": "Microsoft will enable this anti-disc computer with the application store in Windows 8.", + "length": 86 + }, + { + "text": "Dieter Zetsche, the head of Mercedes-Benz Cars, will present a keynote speech Tuesday.", + "length": 86 + }, + { + "text": "Google has reportedly invested more in its TV platform, which should be evident at CES.", + "length": 87 + }, + { + "text": "A new breed of computers called Ultrabooks will launch at CES from several PC manufacturers.", + "length": 92 + }, + { + "text": "MOG, the music-streaming service, plans to announce integration with a line of cars Tuesday.", + "length": 92 + }, + { + "text": "TVs have long been the centerpiece of CES and of the consumer electronics industry as a whole.", + "length": 94 + }, + { + "text": "Sure, CES is about gadgets, but the programs that run on them have become a key selling point.", + "length": 94 + }, + { + "text": "But the Ultrabook initiative is being driven by Intel, which makes the processor that runs them.", + "length": 96 + }, + { + "text": "The compact disc player, high-definition television and Blu-ray each debuted at past conventions.", + "length": 97 + }, + { + "text": "0, which is Google's first system that's designed to work consistently on either a phone or a tablet.", + "length": 101 + }, + { + "text": "\"Are we doing something because it's the right thing to do, or because 'it's the way we've always done it?", + "length": 106 + }, + { + "text": "Not to get left out of the party it started about a decade ago, Microsoft is stepping up its tablet efforts.", + "length": 108 + }, + { + "text": "If the tablet wars are a response to the iPad, then Ultrabooks follow in the footsteps of Apple's MacBook Air.", + "length": 110 + }, + { + "text": "That's a bad sign for Google, whose Chromebooks have struggled to challenge Microsoft in PC operating systems.", + "length": 110 + }, + { + "text": "Electronics makers use CES as a platform to show the types of products they plan to release in the coming year.", + "length": 111 + }, + { + "text": "\"Software has simply become so critical to the overall customer experience,\" said Rubin, the NPD Group analyst.", + "length": 111 + }, + { + "text": "For the last few years, the big push has been in three-dimensional viewing technology, but demand has been small.", + "length": 113 + }, + { + "text": "For example, online video provider Vimeo plans to make a major announcement Monday to coincide with the start of CES.", + "length": 117 + }, + { + "text": "CES is expected to provide a launchpad for TVs that are smarter about how they let watchers access Web content, analysts say.", + "length": 125 + }, + { + "text": "They are expected to be thinner, lighter and more refined versions of gadgets that have already gained a toehold with consumers.", + "length": 128 + }, + { + "text": "But with some of the largest players in today's consumer electronics industry shunning CES, the trade show's impact may be waning.", + "length": 130 + }, + { + "text": "Electronics makers have been chasing after Apple's iPad for two years, and the racetrack is expected to get more crowded next week.", + "length": 131 + }, + { + "text": "With so many options, bargain hunters may get to pick something besides Amazon's Kindle Fire, which lit up holiday sales last month.", + "length": 132 + }, + { + "text": "Apple, the world's most valuable technology company, and Amazon, an upstart in tablets and the leader in e-readers, do not participate.", + "length": 135 + }, + { + "text": "At CES, tech companies will showcase plenty of phones with that software, but the touchscreen tablets with Android 4 will be prevalent.", + "length": 135 + }, + { + "text": "The big product categories that will dominate the CES show floor next week, according to manufacturers and analysts, are not revolutionary.", + "length": 139 + }, + { + "text": "The electronics giants do not want to get beaten to another big opportunity by Apple, which is rumored to be working on a TV set of its own.", + "length": 140 + }, + { + "text": "Google's operating systems can be found in partners' booths, running on phones, tablets and TVs, but the software giant does not run a booth.", + "length": 141 + }, + { + "text": "Last year, 140,000 people in the technology industry convened at the Las Vegas Convention Center to mingle and gawk at cutting-edge hardware.", + "length": 141 + }, + { + "text": "(CNN) -- Like a stereotypical beauty pageant, it looks like thin will be in at the world's largest annual gadget convention next week in Las Vegas.", + "length": 147 + }, + { + "text": "Many electronics makers have apparently decided that each new phone, tablet, car or refrigerator should allow owners to update their Facebook statuses.", + "length": 151 + }, + { + "text": "Vimeo CEO Dae Mellencamp said in an interview at the company's New York headquarters that CES has emerged as an important venue for her company and others like it.", + "length": 163 + }, + { + "text": "The next major version of Windows will have a revamped interface for tablet computers, which will present programs as tiles that can be touched to fill the screen.", + "length": 163 + }, + { + "text": "And Microsoft, which will deliver its 14th CES presentation Monday, announced recently it will not have a booth or participate in the keynotes after this year's event.", + "length": 167 + }, + { + "text": "And since last CES, Pandora has more than doubled the number of cars that tap into its streaming radio service, Pandora founder Tim Westergren said in a phone interview.", + "length": 169 + }, + { + "text": "They are expected to announce partnerships with popular Internet software companies and unveil technical wonders available at the touch of a button on the steering wheel.", + "length": 170 + }, + { + "text": "At the 2012 International Consumer Electronics Show, computer makers will be pushing a new breed of ultra-thin, ultra-light laptops amid a sea of razor-thin smartphones and tablets.", + "length": 181 + }, + { + "text": "They are thinner and lighter than the average laptop because they typically do not have disc drives, and instead of hard drives, use flash memory, which is faster but more expensive.", + "length": 182 + }, + { + "text": "For the 3-D optimists, app-friendly TVs, which also happen to work with 3-D glasses, could allow for more 3-D video from independent filmmakers who distribute over the Web, Rubin noted.", + "length": 185 + }, + { + "text": "\"You have Intel pushing heavily on this very thin but relatively traditional clamshell form factor without a lot of emphasis on touch,\" NPD Group analyst Ross Rubin said in a phone interview.", + "length": 191 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4929571747779846 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:31.499521199Z", + "first_section_created": "2025-12-23T09:32:31.499911315Z", + "last_section_published": "2025-12-23T09:32:31.500371333Z", + "all_results_received": "2025-12-23T09:32:31.590892671Z", + "output_generated": "2025-12-23T09:32:31.591103579Z", + "total_processing_time_ms": 91, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 90, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:31.499911315Z", + "publish_time": "2025-12-23T09:32:31.500166625Z", + "first_worker_start": "2025-12-23T09:32:31.50078315Z", + "last_worker_end": "2025-12-23T09:32:31.590005Z", + "total_journey_time_ms": 90, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:31.50079135Z", + "start_time": "2025-12-23T09:32:31.500878854Z", + "end_time": "2025-12-23T09:32:31.501012359Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:31.501Z", + "start_time": "2025-12-23T09:32:31.501146Z", + "end_time": "2025-12-23T09:32:31.590005Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 88 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:31.500705347Z", + "start_time": "2025-12-23T09:32:31.500806251Z", + "end_time": "2025-12-23T09:32:31.500945356Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:31.500681446Z", + "start_time": "2025-12-23T09:32:31.50078315Z", + "end_time": "2025-12-23T09:32:31.500865453Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:31.500256729Z", + "publish_time": "2025-12-23T09:32:31.500371333Z", + "first_worker_start": "2025-12-23T09:32:31.500855153Z", + "last_worker_end": "2025-12-23T09:32:31.570307Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:31.500816851Z", + "start_time": "2025-12-23T09:32:31.500855153Z", + "end_time": "2025-12-23T09:32:31.500882854Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:31.501044Z", + "start_time": "2025-12-23T09:32:31.501183Z", + "end_time": "2025-12-23T09:32:31.570307Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:31.500862353Z", + "start_time": "2025-12-23T09:32:31.500898654Z", + "end_time": "2025-12-23T09:32:31.500944356Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:31.500831552Z", + "start_time": "2025-12-23T09:32:31.500879154Z", + "end_time": "2025-12-23T09:32:31.500904055Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 157, + "min_processing_ms": 69, + "max_processing_ms": 88, + "avg_processing_ms": 78, + "median_processing_ms": 88, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3294, + "slowest_section_id": 0, + "slowest_section_time_ms": 90 + } +} diff --git a/data/output/0010b7fef950827b8191f7a11e09532e4ec8e323.json b/data/output/0010b7fef950827b8191f7a11e09532e4ec8e323.json new file mode 100644 index 0000000..f85bccc --- /dev/null +++ b/data/output/0010b7fef950827b8191f7a11e09532e4ec8e323.json @@ -0,0 +1,270 @@ +{ + "file_name": "0010b7fef950827b8191f7a11e09532e4ec8e323.txt", + "total_words": 667, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "rugby", + "count": 13 + }, + { + "word": "he", + "count": 12 + }, + { + "word": "new", + "count": 12 + }, + { + "word": "s", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "'For that we will be eternally grateful to him.", + "length": 47 + }, + { + "text": "I'm looking forward to getting out and playing some good footy.", + "length": 63 + }, + { + "text": "We look forward to having him in New Zealand for one final season.", + "length": 66 + }, + { + "text": "2015 is going to be a big year and I'm looking forward to getting into it.", + "length": 74 + }, + { + "text": "'Having said that, it's the immediate future that is exciting me at the moment.", + "length": 79 + }, + { + "text": "He has broken several records along the way, including amassing an incredible 1,457 points.", + "length": 91 + }, + { + "text": "'Dan has been incredibly loyal to New Zealand Rugby over the years and for that we thank him.", + "length": 93 + }, + { + "text": "Carter has only recently returned to action after he broke his leg playing for the Canterbury Crusaders .", + "length": 105 + }, + { + "text": "All Blacks centre Ma'a Nonu is also reportedly fielding overseas offers from a number of different clubs .", + "length": 107 + }, + { + "text": "All Blacks star Daniel Carter will join French club Racing Metro on a three-year deal after the World Cup .", + "length": 107 + }, + { + "text": "' Carter signed a seven-month deal with Perpignan in 2008 but only played five games due to a foot injury .", + "length": 107 + }, + { + "text": "' Carter missed the latter stages of New Zealand's 2011 World Cup triumph after he sustained a groin injury .", + "length": 109 + }, + { + "text": "' The All Blacks fly half burst onto the scene when he starred in his side's 3-0 whitewash of the Lions in 2005 .", + "length": 113 + }, + { + "text": "The 32-year-old fly-half's announcement confirms a second stint in France, having previously played for Perpignan.", + "length": 114 + }, + { + "text": "'On behalf of the All Blacks, I want to congratulate Dan on his decision and also on his career to date,' he said.", + "length": 114 + }, + { + "text": "He said at a media conference in Auckland on Thursday: 'It's going to be an awesome adventure for me and my family.", + "length": 115 + }, + { + "text": "New Zealand star Dan Carter will join wealthy French club Racing Metro at the end of 2015 on a three-year contract.", + "length": 115 + }, + { + "text": "The deal, which could be worth as much as £1million per year will bring the curtain down on Carter's stellar Test career.", + "length": 122 + }, + { + "text": "Carter made his All Blacks debut in 2003 and has gone on to win 102 caps, one of only five centurions for the national side.", + "length": 124 + }, + { + "text": "'Having visited France many times I know what the French culture and their rugby culture is like and it's something I really love.", + "length": 130 + }, + { + "text": "'My body is feeling really good, I'm training really hard and getting some good conditioning in so I can hit the ground running next year.", + "length": 138 + }, + { + "text": "Steve Hansen, who extended his deal as New Zealand head coach earlier this week, also paid tribute to the two-time world rugby player of the year.", + "length": 146 + }, + { + "text": "'Whilst he hasn't finished yet in New Zealand, he has already set the benchmark for first five play, and we look forward to seeing him out on the paddock in 2015.", + "length": 162 + }, + { + "text": "'He is still in the midst of a wonderful career, both as a provincial and Super Rugby player and an All Black, and will leave us as the greatest first five-eighth ever.", + "length": 168 + }, + { + "text": "However, world rugby's leading points scorer and one of the All Blacks' all-time leading players will feature at next autumn's World Cup before linking up with the Top 14 club.", + "length": 176 + }, + { + "text": "'I am sure all New Zealand rugby fans will celebrate Dan's remaining time in New Zealand and he will leave with New Zealand rugby's very best wishes,' said New Zealand Rugby chief executive Steve Tew.", + "length": 200 + }, + { + "text": "'Whilst everyone is aware of his outstanding achievements on the rugby field, it's also important to note that during his career he recommitted to New Zealand at two critical junctures in our rugby history - after the 2007 Rugby World Cup and again following the 2011 tournament.", + "length": 279 + }, + { + "text": "'That decision by Dan and other senior players to re-sign with New Zealand Rugby after 2007 should not be underestimated; he was a key part of a group of players who formed the backbone of the All Blacks over the following years, which ultimately helped the All Blacks to victory at Rugby World Cup 2011.", + "length": 304 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5102277398109436 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:32.001191859Z", + "first_section_created": "2025-12-23T09:32:32.001564674Z", + "last_section_published": "2025-12-23T09:32:32.001757081Z", + "all_results_received": "2025-12-23T09:32:32.064137588Z", + "output_generated": "2025-12-23T09:32:32.064312095Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:32.001564674Z", + "publish_time": "2025-12-23T09:32:32.001757081Z", + "first_worker_start": "2025-12-23T09:32:32.00247561Z", + "last_worker_end": "2025-12-23T09:32:32.063266Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:32.002416208Z", + "start_time": "2025-12-23T09:32:32.00247561Z", + "end_time": "2025-12-23T09:32:32.002545513Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:32.00265Z", + "start_time": "2025-12-23T09:32:32.002786Z", + "end_time": "2025-12-23T09:32:32.063266Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:32.00246291Z", + "start_time": "2025-12-23T09:32:32.002551713Z", + "end_time": "2025-12-23T09:32:32.002641217Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:32.00247071Z", + "start_time": "2025-12-23T09:32:32.002534013Z", + "end_time": "2025-12-23T09:32:32.002562414Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3592, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0010c870d3fc53ea7f2a4a50f6496dc2df17e02f.json b/data/output/0010c870d3fc53ea7f2a4a50f6496dc2df17e02f.json new file mode 100644 index 0000000..bb2007c --- /dev/null +++ b/data/output/0010c870d3fc53ea7f2a4a50f6496dc2df17e02f.json @@ -0,0 +1,354 @@ +{ + "file_name": "0010c870d3fc53ea7f2a4a50f6496dc2df17e02f.txt", + "total_words": 801, + "top_n_words": [ + { + "word": "the", + "count": 61 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "s", + "count": 16 + }, + { + "word": "have", + "count": 14 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "they", + "count": 13 + }, + { + "word": "with", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": ".", + "length": 1 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "\"The game in the U.", + "length": 19 + }, + { + "text": "sports marketing practices.", + "length": 27 + }, + { + "text": "\"The commercial deals in the U.", + "length": 31 + }, + { + "text": "\" But Bayern are the year's big winners.", + "length": 40 + }, + { + "text": "cannot compete with the European market.", + "length": 40 + }, + { + "text": "\" Munich mourns as Bayern blow big chance .", + "length": 43 + }, + { + "text": "Click here to see football's top 10 brands .", + "length": 44 + }, + { + "text": "The MLS is still largely only shown in the U.", + "length": 45 + }, + { + "text": "is still developing massively,\" said Chattaway.", + "length": 47 + }, + { + "text": "\"The revenues are a fraction of those in Europe.", + "length": 48 + }, + { + "text": "The Premier League clubs are benefiting from that.", + "length": 50 + }, + { + "text": "\"There is scope for football clubs to learn from U.", + "length": 51 + }, + { + "text": "They have brought their expertise into the sports industry.", + "length": 59 + }, + { + "text": "\"They have got really strong links with strong German brands.", + "length": 61 + }, + { + "text": "\"It still generates the most money because of the broadcast rights.", + "length": 67 + }, + { + "text": "Chelsea, by comparison, made fifth place with a value of $398 million.", + "length": 70 + }, + { + "text": "\"They have both had relatively successful years on the pitch,\" he said.", + "length": 71 + }, + { + "text": "\"The eurozone crisis has really impacted the capital in Spain and Italy.", + "length": 72 + }, + { + "text": ", it hasn't really expanded globally as quickly as we would have expected.", + "length": 74 + }, + { + "text": "\"Bayern have long-term deals, they have been with Adidas for over 10 years.", + "length": 75 + }, + { + "text": "\"If you look at Bayern Munich, they are a domestic powerhouse,\" said Chattaway.", + "length": 79 + }, + { + "text": "\"Within Europe, the Premier League is still the pinnacle,\" explained Chattaway.", + "length": 79 + }, + { + "text": "The games are rarely sold out and the grounds themselves have quite a small capacity.", + "length": 85 + }, + { + "text": "\"Manchester United have got a global reach,\" Brand Finance's head of sports brands Dave Chattaway told CNN.", + "length": 107 + }, + { + "text": "\"It is much more attractive to a foreign audience than the German Bundesliga or the Spanish First Division.", + "length": 107 + }, + { + "text": "It's not necessarily something they are doing wrong commercially, it's a sign of the economy they operate in.", + "length": 109 + }, + { + "text": "But English giants Manchester United lead the way, as in 2011, with a brand estimated to be worth $853 million.", + "length": 111 + }, + { + "text": "\"United have got quite a professional setup, with people who have worked for Pepsi, Disney, all different kinds of marketing industries.", + "length": 136 + }, + { + "text": "\"The clubs need to better understand the brand asset that they own so that they can ensure they get the right returns on all commercial deals.", + "length": 142 + }, + { + "text": "They generate the highest commercial revenue and they are able to negotiate the highest possible deals based on their dominance of the German market.", + "length": 149 + }, + { + "text": "Behind United and Bayern are the Spanish \"El Clasico\" rivals of Real Madrid, third with a value of $600 million, and Barcelona, with a brand worth $580 million.", + "length": 160 + }, + { + "text": "Despite the defeat by Chelsea and having been beaten to the German league and cup by Borussia Dortmund, the club's brand value grew by 59% over the last 12 months.", + "length": 163 + }, + { + "text": "\" With such a huge gap between the sport's most lucrative brands and those with less commercial appeal, are football clubs making the most of their financial potential?", + "length": 168 + }, + { + "text": "Former England captain David Beckham and his Los Angeles Galaxy teammates enjoyed a landmark year in 2011, being crowned Major League Soccer champions for a third time.", + "length": 168 + }, + { + "text": "Despite their success, and despite boasting one of football's most recognizable and marketable stars, the Galaxy ($46 million) only crept onto the list in 50th position.", + "length": 169 + }, + { + "text": "\" The list took into account various revenue streams for clubs, such as ticket sales, merchandising, sponsorship deals and money received from the sale of broadcasting rights.", + "length": 175 + }, + { + "text": "\"I think there is more scope for all the clubs to further maximize value -- clubs have traditionally been slow and unimaginative in monetizing the brand they own,\" Chattaway said.", + "length": 179 + }, + { + "text": "AC Milan ($292 million) placed ninth on the list after winning the title in 2010-11, while this season's champions Juventus ($160 million) fell from 10th in 2011 to 16th this year.", + "length": 180 + }, + { + "text": "\" Italian Cup winners Napoli ($85 million) were the only Serie A club to increase brand value, coming off the back of a relatively successful Champions League campaign, to be 22nd overall.", + "length": 188 + }, + { + "text": "Despite Saturday's crushing penalty shootout loss to the English side in their own backyard at the the Allianz Arena, the Bavarians have been ranked as the second most valuable brand in football.", + "length": 195 + }, + { + "text": "According to a new report by independent consultancy Brand Finance, which has compiled a list of the 50 biggest brands in the sport, the four-time European champions have been valued at $786 million.", + "length": 199 + }, + { + "text": "\" The top 10 is dominated by teams from the English Premier League, with United and Chelsea followed by 2011-12 title winners Manchester City in eighth ($302 million) Torres unsure of Chelsea future .", + "length": 200 + }, + { + "text": "Spanish champions Real and 2011 European champions Barca have seen similar decreases in the value of their brands, 7% and 8% respectively, which Chattaway puts down to the country's current economic plight.", + "length": 206 + }, + { + "text": "(CNN) -- Bayern Munich might be licking their wounds after defeat in the European Champions League final, but the German club can find comfort in victory of a different kind: by beating Chelsea in football's financial league table.", + "length": 231 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5554896593093872 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:32.502538705Z", + "first_section_created": "2025-12-23T09:32:32.504334577Z", + "last_section_published": "2025-12-23T09:32:32.504623389Z", + "all_results_received": "2025-12-23T09:32:32.563010535Z", + "output_generated": "2025-12-23T09:32:32.563167842Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:32.504334577Z", + "publish_time": "2025-12-23T09:32:32.504623389Z", + "first_worker_start": "2025-12-23T09:32:32.50515161Z", + "last_worker_end": "2025-12-23T09:32:32.562078Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:32.505165511Z", + "start_time": "2025-12-23T09:32:32.505279115Z", + "end_time": "2025-12-23T09:32:32.505382519Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:32.505362Z", + "start_time": "2025-12-23T09:32:32.50551Z", + "end_time": "2025-12-23T09:32:32.562078Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:32.505098908Z", + "start_time": "2025-12-23T09:32:32.505207312Z", + "end_time": "2025-12-23T09:32:32.505306216Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:32.505070807Z", + "start_time": "2025-12-23T09:32:32.50515161Z", + "end_time": "2025-12-23T09:32:32.505206212Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4765, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/00120f91cfcab17bac165f7a4719019a628a9db3.json b/data/output/00120f91cfcab17bac165f7a4719019a628a9db3.json new file mode 100644 index 0000000..1c99e37 --- /dev/null +++ b/data/output/00120f91cfcab17bac165f7a4719019a628a9db3.json @@ -0,0 +1,226 @@ +{ + "file_name": "00120f91cfcab17bac165f7a4719019a628a9db3.txt", + "total_words": 273, + "top_n_words": [ + { + "word": "a", + "count": 17 + }, + { + "word": "the", + "count": 13 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "s", + "count": 6 + }, + { + "word": "to", + "count": 6 + }, + { + "word": "gadhafi", + "count": 5 + }, + { + "word": "where", + "count": 5 + }, + { + "word": "in", + "count": 4 + }, + { + "word": "it", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Pieces of metal and shrapnel.", + "length": 29 + }, + { + "text": "\"There is literally a city under here.", + "length": 38 + }, + { + "text": "She dubbed it \"Gadhafi's inner sanctum.", + "length": 39 + }, + { + "text": "A section where NATO bombs fell and the roof caved in.", + "length": 54 + }, + { + "text": "CNN's Sara Sidner got a peek at the passageways Friday.", + "length": 55 + }, + { + "text": "She spotted a golf cart that can easily fit in the corridors.", + "length": 61 + }, + { + "text": "A charred ceiling, couches and beds where a fire apparently occurred.", + "length": 69 + }, + { + "text": "\"It's set up like a survival bunker,\" Sidner said in an on-air report.", + "length": 70 + }, + { + "text": "The tunnels Sidner saw are wide enough for adults to walk side by side.", + "length": 71 + }, + { + "text": "\" So far, she said, rebels have cleared about 700 meters of underground passages.", + "length": 81 + }, + { + "text": "That's where 33 journalists and two foreign nationals were held for five days by pro-Gadhafi forces.", + "length": 100 + }, + { + "text": "The tunnel network is believed to extend all the way to the city's international airport and the Rixos hotel.", + "length": 109 + }, + { + "text": "Another room contained videotapes lined up on a shelf, part of a TV studio where Gadhafi may have recorded messages.", + "length": 116 + }, + { + "text": "Sidner also saw a range of other sights as she strolled through the labyrinth: A thick wall, a massive door and a sturdy lock.", + "length": 126 + }, + { + "text": "It also is thought to extend to a neighborhood where Gadhafi forces were lobbing shells recently toward the compound after it was taken over by the rebels.", + "length": 155 + }, + { + "text": "\" The correspondent, who's been covering the battle of Tripoli, walked down steps into a pitch-dark tunnel and used a flashlight to navigate an underworld described as \"massive.", + "length": 177 + }, + { + "text": "Tripoli, Libya (CNN) -- Rebels in Tripoli furiously hunting for signs of longtime Libyan leader Moammar Gadhafi are exploring a network of tunnels and bunkers built beneath his massive compound.", + "length": 194 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4851723611354828 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:33.005418713Z", + "first_section_created": "2025-12-23T09:32:33.005802029Z", + "last_section_published": "2025-12-23T09:32:33.006016937Z", + "all_results_received": "2025-12-23T09:32:33.072825222Z", + "output_generated": "2025-12-23T09:32:33.072949527Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:33.005802029Z", + "publish_time": "2025-12-23T09:32:33.006016937Z", + "first_worker_start": "2025-12-23T09:32:33.006517857Z", + "last_worker_end": "2025-12-23T09:32:33.071968Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:33.006529958Z", + "start_time": "2025-12-23T09:32:33.00658206Z", + "end_time": "2025-12-23T09:32:33.006612961Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:33.006741Z", + "start_time": "2025-12-23T09:32:33.006895Z", + "end_time": "2025-12-23T09:32:33.071968Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:33.006441254Z", + "start_time": "2025-12-23T09:32:33.006517857Z", + "end_time": "2025-12-23T09:32:33.006558859Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:33.006476956Z", + "start_time": "2025-12-23T09:32:33.006541758Z", + "end_time": "2025-12-23T09:32:33.00658096Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1559, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0012469ec896efd985de21eadb4504a8a22097fd.json b/data/output/0012469ec896efd985de21eadb4504a8a22097fd.json new file mode 100644 index 0000000..62b4407 --- /dev/null +++ b/data/output/0012469ec896efd985de21eadb4504a8a22097fd.json @@ -0,0 +1,310 @@ +{ + "file_name": "0012469ec896efd985de21eadb4504a8a22097fd.txt", + "total_words": 477, + "top_n_words": [ + { + "word": "a", + "count": 19 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "the", + "count": 14 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "barbie", + "count": 11 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "s", + "count": 10 + }, + { + "word": "entrepreneur", + "count": 7 + }, + { + "word": "mattel", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "case.", + "length": 5 + }, + { + "text": "More...", + "length": 7 + }, + { + "text": "president.", + "length": 10 + }, + { + "text": "Erin Clements .", + "length": 15 + }, + { + "text": "about this role.", + "length": 16 + }, + { + "text": "growing in number.", + "length": 18 + }, + { + "text": "99, according to CNN.", + "length": 21 + }, + { + "text": "modern-day careers for women.", + "length": 29 + }, + { + "text": "Fancy a Slash of milk with that?", + "length": 32 + }, + { + "text": "' It's the first time in her 55-year .", + "length": 38 + }, + { + "text": "a reflection of the times,' she added.", + "length": 38 + }, + { + "text": "'We always try to make career Barbie .", + "length": 38 + }, + { + "text": "This gives a new meaning to face painting!", + "length": 42 + }, + { + "text": "[It's] a great way to encourage girls to also learn .", + "length": 53 + }, + { + "text": "'Women entrepreneurs are more prevalent now and they’re .", + "length": 59 + }, + { + "text": "And then we wonder why girls don’t pursue careers in tech!", + "length": 60 + }, + { + "text": "Artist creates portraits of celebrities - using cornflakes .", + "length": 60 + }, + { + "text": "Barbie Entrepreneur comes in four ethnicities and will retail for $12.", + "length": 70 + }, + { + "text": "Think pink: Each version of Entrepreneur Barbie sports a fuchsia frock .", + "length": 72 + }, + { + "text": "And ever the Renaissance woman, the leggy blond doll has found a new calling: entrepreneur.", + "length": 91 + }, + { + "text": "'When you ask a girl what a computer scientist is, she usually pictures a geeky guy typing away.", + "length": 96 + }, + { + "text": "' Tools of the trade: Entrepreneur Barbie's accessories include a tablet, smartphone and laptop case .", + "length": 102 + }, + { + "text": "Make-up artist who transforms herself into celebrities, including footballers from the World Cup Squad .", + "length": 104 + }, + { + "text": "Since making her 1959 debut, Barbie has done stints as an astronaut, a surgeon, a news anchor and even a U.", + "length": 107 + }, + { + "text": "'She's ready to take on anything that comes her way,' Mattel spokeswoman Michelle Chidoni said earlier this year.", + "length": 113 + }, + { + "text": "history that Barbie has taken on the role of business owner, where women are 'living their dreams their own ways.", + "length": 113 + }, + { + "text": "We have to change popular culture and start showing more women, more cool, dynamic, creative women, in these roles.", + "length": 115 + }, + { + "text": "' Chidoni said Mattel is also partnering with the Girl Scouts to create video stories of successful female entrepreneurs.", + "length": 121 + }, + { + "text": "Barbie will also get her own LinkedIn page and a billboard in New York's Times Square featuring the hashtag #unapologetic.", + "length": 122 + }, + { + "text": "Entrepreneur Barbie is part of Mattel's 'I can be' line of career-oriented Barbies, which launched in 2010 in an effort to showcase .", + "length": 134 + }, + { + "text": "The campaign's participants offered career tips to young fans in a Twitter discussion Wednesday afternoon, using the hashtag #BarbieChat.", + "length": 137 + }, + { + "text": "Well connected: Entrepreneur Barbie, part of Mattel's 'I can be' line of career-oriented dolls, comes with her own smartphone and tablet .", + "length": 139 + }, + { + "text": "Mattel has teamed with eight female entrepreneurs to launch the tech-savvy doll, who comes equipped with a tablet, a smartphone and a laptop .", + "length": 142 + }, + { + "text": "'Unfortunately we live in a culture where girls are bombarded with images of male coders and engineers that just don’t look like them,' Saujani told Wired.", + "length": 157 + }, + { + "text": "Entrepreneur Barbie, which Mattel unveiled at the American International Toy Fair in February, is available on Amazon starting today, and will hit other retailers in August.", + "length": 173 + }, + { + "text": "Barbie's 'Chief Inspirational Officers' include Girls Who Code founder Reshma Saujani, One Kings Lane founders Susan Feldman and Alison Pincus, and Rent the Runway founders Jennifer Hyman and Jenny Fleiss.", + "length": 205 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.45161816477775574 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:33.506814962Z", + "first_section_created": "2025-12-23T09:32:33.507180076Z", + "last_section_published": "2025-12-23T09:32:33.507409886Z", + "all_results_received": "2025-12-23T09:32:33.570953939Z", + "output_generated": "2025-12-23T09:32:33.571093545Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:33.507180076Z", + "publish_time": "2025-12-23T09:32:33.507409886Z", + "first_worker_start": "2025-12-23T09:32:33.507882305Z", + "last_worker_end": "2025-12-23T09:32:33.570048Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:33.507919106Z", + "start_time": "2025-12-23T09:32:33.507981009Z", + "end_time": "2025-12-23T09:32:33.508044011Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:33.508124Z", + "start_time": "2025-12-23T09:32:33.508295Z", + "end_time": "2025-12-23T09:32:33.570048Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:33.507922406Z", + "start_time": "2025-12-23T09:32:33.507990909Z", + "end_time": "2025-12-23T09:32:33.508059712Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:33.507830403Z", + "start_time": "2025-12-23T09:32:33.507882305Z", + "end_time": "2025-12-23T09:32:33.507926806Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2892, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/00128f1ba30d5e9e0f17df83285a1bc2072e2f01.json b/data/output/00128f1ba30d5e9e0f17df83285a1bc2072e2f01.json new file mode 100644 index 0000000..e36b83b --- /dev/null +++ b/data/output/00128f1ba30d5e9e0f17df83285a1bc2072e2f01.json @@ -0,0 +1,436 @@ +{ + "file_name": "00128f1ba30d5e9e0f17df83285a1bc2072e2f01.txt", + "total_words": 1091, + "top_n_words": [ + { + "word": "the", + "count": 72 + }, + { + "word": "to", + "count": 41 + }, + { + "word": "of", + "count": 40 + }, + { + "word": "and", + "count": 31 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "s", + "count": 25 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "that", + "count": 21 + }, + { + "word": "said", + "count": 20 + }, + { + "word": "cheney", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "But Rep.", + "length": 8 + }, + { + "text": "\"Stuff happens.", + "length": 15 + }, + { + "text": "national security.", + "length": 18 + }, + { + "text": "ambassador to Iraq.", + "length": 19 + }, + { + "text": "troops was in its \"last throes.", + "length": 31 + }, + { + "text": "\"I had my say,\" Cheney told CNN.", + "length": 32 + }, + { + "text": "There's been elections, a constitution.", + "length": 39 + }, + { + "text": "negotiator with North Korea, to be the U.", + "length": 41 + }, + { + "text": "Watch Cheney tout Bush administration » .", + "length": 42 + }, + { + "text": "\"We're going to recover, because we're Americans.", + "length": 49 + }, + { + "text": "\"I got my chance to voice my views and my objections.", + "length": 53 + }, + { + "text": "\" In 2005, Cheney said the raging insurgency against U.", + "length": 55 + }, + { + "text": "list of state sponsors of terrorism as part of the deal.", + "length": 56 + }, + { + "text": "\" \"I think it's a choice that I wouldn't have made,\" he said.", + "length": 61 + }, + { + "text": "wind\" by refusing to issue Libby a pardon before leaving office.", + "length": 64 + }, + { + "text": "And the administration has to be able to respond to that, and we did.", + "length": 69 + }, + { + "text": "Since taking office in January, Obama has announced plans to close the U.", + "length": 73 + }, + { + "text": "\" asked Sestak, a former admiral who led the Navy's anti-terrorism efforts.", + "length": 75 + }, + { + "text": "They're about to have another presidential election here in the near future.", + "length": 76 + }, + { + "text": "\"The number of casualties [among] Iraqis and Americans is significantly diminished.", + "length": 83 + }, + { + "text": "\"The cost of this war is something that I strongly believe has far, far hurt us,\" he said.", + "length": 90 + }, + { + "text": "But Iraq was just one piece of our security, and this administration failed to realize that.", + "length": 92 + }, + { + "text": "None of those weapons were found after the invasion, but Cheney said, \"We've eliminated that possibility.", + "length": 105 + }, + { + "text": "\"I don't hear much talk about that, but the fact is, the violence level is down 90 percent,\" Cheney said.", + "length": 105 + }, + { + "text": "\" The Obama administration has nominated Christopher Hill, the State Department official who was the top U.", + "length": 107 + }, + { + "text": "\"We've accomplished nearly everything we set out to do,\" ex-Vice President Dick Cheney says Sunday about Iraq.", + "length": 110 + }, + { + "text": "\" And Cheney said he argued against the administration's policy on North Korea, which tested a nuclear weapon in 2006.", + "length": 118 + }, + { + "text": "\"When you go back to the law enforcement mode, which I sense is what they're doing, closing Guantanamo and so forth ...", + "length": 119 + }, + { + "text": "I didn't think the North Koreans were going to keep their end of the bargain in terms of what they agreed to, and they didn't.", + "length": 126 + }, + { + "text": "He would disclose no details of his efforts to lobby the president on Libby's behalf, saying they would be \"best left to history.", + "length": 129 + }, + { + "text": "\" Obama has begun to wind down the war in Iraq, which has cost more than 4,200 American lives and nearly $700 billion in direct costs.", + "length": 134 + }, + { + "text": "The Bush administration reached a still-incomplete disarmament deal with the isolated Stalinist state in 2007 and removed it from the U.", + "length": 136 + }, + { + "text": "Libby was convicted of perjury, obstruction of justice, and lying to federal agents investigating the leak of a former CIA officer's identity.", + "length": 142 + }, + { + "text": "\"I believe firmly that Scooter was unjustly accused and prosecuted and deserved a pardon, and the president disagreed with that,\" Cheney said.", + "length": 142 + }, + { + "text": "\" Though considered one of the administration's most influential figures, Cheney said President Bush rebuffed his advice on at least two issues.", + "length": 144 + }, + { + "text": "Joe Sestak, D-Pennsylvania, said the Bush administration's policies undercut \"what is actually the source of America's greatness -- our principles.", + "length": 147 + }, + { + "text": "\" But Sestak said the administration was too slow to react to the problems it faced in Iraq and let the conflict overshadow the \"whole fabric\" of U.", + "length": 148 + }, + { + "text": "\"All of these things required us to spend money that we had not originally planned to spend, or weren't originally part of the budget,\" Cheney said.", + "length": 148 + }, + { + "text": "Cheney said Hill lacks the Middle East experience that his predecessors have, and \"I did not support the work that Chris Hill did with respect to North Korea.", + "length": 158 + }, + { + "text": "Cheney said the administration appears to be returning to the pre-2001 model of treating terrorism as a law enforcement issue, rather than a military problem.", + "length": 158 + }, + { + "text": "We have succeeded in creating in the heart of the Middle East a democratically governed Iraq, and that is a big deal, and it is, in fact, what we set out to do.", + "length": 160 + }, + { + "text": "But Cheney said the United States has \"accomplished nearly everything we set out to do\" in Iraq, including establishing a democratic government in the Middle East.", + "length": 163 + }, + { + "text": "\" \"How can we say that keeping a man in a black hole forever -- perpetually in a black hole -- and saying, 'Let's torture when we decide to,' is what America stands for?", + "length": 169 + }, + { + "text": "prison camp at Guantanamo Bay, Cuba, to halt the military trials of suspected terrorists there, and to make CIA officers follow the Army field manual's rules on interrogations.", + "length": 176 + }, + { + "text": "The Bush administration took office at the end of an economic boom and left in the middle of a deep recession, with a budget surplus in 2001 becoming a $1 trillion-plus deficit by 2009.", + "length": 185 + }, + { + "text": "\"President Obama campaigned against it all across the country, and now he is making some choices that, in my mind, will, in fact, raise the risk to the American people of another attack,\" he said.", + "length": 196 + }, + { + "text": "\" Nearly two years later, a commitment of more than 30,000 additional American troops and a widespread effort to pay former insurgents to turn against Islamic militants helped quell the worst of the violence.", + "length": 208 + }, + { + "text": "they are very much giving up that center of attention and focus that's required, that concept of military threat that is essential if you're going to successfully defend the nation against further attacks,\" he said.", + "length": 215 + }, + { + "text": "Cheney was one of the administration's leading advocates of the 2003 invasion of Iraq, pressing the Bush administration's argument that Iraq was concealing weapons of mass destruction and could provide those weapons to terrorists.", + "length": 230 + }, + { + "text": "But Cheney said he and Bush had to spend money to deal with the September 11, 2001 attacks, the resulting war in Afghanistan, the disaster of 2005's Hurricane Katrina, and the costly and unpopular war in Iraq, now nearly six years old.", + "length": 235 + }, + { + "text": "WASHINGTON (CNN) -- Former Vice President Dick Cheney on Sunday defended the Bush administration's economic record, the invasion of Iraq and the treatment of suspected terrorists, warning that reversing its anti-terrorism policies endangers Americans.", + "length": 251 + }, + { + "text": "Critics said the Bush administration's \"alternative\" interrogation techniques amounted to the torture of prisoners in American custody, while the administration's warrantless surveillance program violated federal laws enacted after the Watergate scandal.", + "length": 254 + }, + { + "text": "In a wide-ranging interview with CNN's \"State of the Union,\" Cheney said the harsh interrogations of suspects and the use of warrantless electronic surveillance were \"absolutely essential\" to get information to prevent more attacks like the 2001 suicide hijackings that targeted New York and Washington.", + "length": 303 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7293055653572083 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:34.008172909Z", + "first_section_created": "2025-12-23T09:32:34.008599926Z", + "last_section_published": "2025-12-23T09:32:34.009079345Z", + "all_results_received": "2025-12-23T09:32:34.120086606Z", + "output_generated": "2025-12-23T09:32:34.120283114Z", + "total_processing_time_ms": 112, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 111, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:34.008599926Z", + "publish_time": "2025-12-23T09:32:34.008881937Z", + "first_worker_start": "2025-12-23T09:32:34.009489462Z", + "last_worker_end": "2025-12-23T09:32:34.117709Z", + "total_journey_time_ms": 109, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:34.009582965Z", + "start_time": "2025-12-23T09:32:34.009658268Z", + "end_time": "2025-12-23T09:32:34.009765173Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:34.009916Z", + "start_time": "2025-12-23T09:32:34.010068Z", + "end_time": "2025-12-23T09:32:34.117709Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 107 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:34.009399258Z", + "start_time": "2025-12-23T09:32:34.009489462Z", + "end_time": "2025-12-23T09:32:34.009604566Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:34.009424759Z", + "start_time": "2025-12-23T09:32:34.009528663Z", + "end_time": "2025-12-23T09:32:34.009614767Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:34.008938239Z", + "publish_time": "2025-12-23T09:32:34.009079345Z", + "first_worker_start": "2025-12-23T09:32:34.009580165Z", + "last_worker_end": "2025-12-23T09:32:34.081067Z", + "total_journey_time_ms": 72, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:34.009605266Z", + "start_time": "2025-12-23T09:32:34.009682769Z", + "end_time": "2025-12-23T09:32:34.009716071Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:34.009811Z", + "start_time": "2025-12-23T09:32:34.009952Z", + "end_time": "2025-12-23T09:32:34.081067Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 71 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:34.009543064Z", + "start_time": "2025-12-23T09:32:34.009580165Z", + "end_time": "2025-12-23T09:32:34.009628867Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:34.009663669Z", + "start_time": "2025-12-23T09:32:34.00969977Z", + "end_time": "2025-12-23T09:32:34.009711371Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 178, + "min_processing_ms": 71, + "max_processing_ms": 107, + "avg_processing_ms": 89, + "median_processing_ms": 107, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3200, + "slowest_section_id": 0, + "slowest_section_time_ms": 109 + } +} diff --git a/data/output/0012c075cf01cbdc30895f7a1568cf556225c0c3.json b/data/output/0012c075cf01cbdc30895f7a1568cf556225c0c3.json new file mode 100644 index 0000000..0e92d3e --- /dev/null +++ b/data/output/0012c075cf01cbdc30895f7a1568cf556225c0c3.json @@ -0,0 +1,466 @@ +{ + "file_name": "0012c075cf01cbdc30895f7a1568cf556225c0c3.txt", + "total_words": 676, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "her", + "count": 15 + }, + { + "word": "jones", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "with", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "she", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "year.", + "length": 5 + }, + { + "text": "to do.", + "length": 6 + }, + { + "text": "Sarah .", + "length": 7 + }, + { + "text": "school.", + "length": 7 + }, + { + "text": "Victory?", + "length": 8 + }, + { + "text": "TheDirty.", + "length": 9 + }, + { + "text": "com’s .", + "length": 9 + }, + { + "text": "A former .", + "length": 10 + }, + { + "text": "She also .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "at $30,000.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "the Internet.", + "length": 13 + }, + { + "text": "Bengals players.", + "length": 16 + }, + { + "text": "Sordid: TheDirty.", + "length": 17 + }, + { + "text": "Jones was accused .", + "length": 19 + }, + { + "text": "com would have the .", + "length": 20 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "10:32 EST, 10 May 2012 .", + "length": 24 + }, + { + "text": "10:32 EST, 10 May 2012 .", + "length": 24 + }, + { + "text": "also be central to the case.", + "length": 28 + }, + { + "text": "pleaded not guilty in April.", + "length": 28 + }, + { + "text": "She was awarded an $11million .", + "length": 31 + }, + { + "text": "Her mother Cheryl, 55, a middle .", + "length": 33 + }, + { + "text": "' Jones sued Nik Richie, TheDirty.", + "length": 34 + }, + { + "text": "founder, for character defamation.", + "length": 34 + }, + { + "text": "Computers belonging to Cheryl and .", + "length": 35 + }, + { + "text": "It is believed text messages will .", + "length": 35 + }, + { + "text": "Jones, a five-year veteran of the .", + "length": 35 + }, + { + "text": "In the civil defamation case being .", + "length": 36 + }, + { + "text": "seized as part of the investigation.", + "length": 36 + }, + { + "text": "Judge William Bertelsman at the 6th U.", + "length": 38 + }, + { + "text": "When she emailed the site to plead they .", + "length": 41 + }, + { + "text": "Appearing on ABC at the time, she said: .", + "length": 41 + }, + { + "text": "She is scheduled to stand trial on June 27.", + "length": 43 + }, + { + "text": "Judge Patricia Summe reduced both to $15,000.", + "length": 45 + }, + { + "text": "brought by Jones, the judge said that TheDirty.", + "length": 47 + }, + { + "text": "default judgement but the litigation is ongoing.", + "length": 48 + }, + { + "text": "charges of first-degree sexual abuse last month.", + "length": 48 + }, + { + "text": "induce a minor to engage in sexual activities, a .", + "length": 50 + }, + { + "text": "charge of tampering with evidence in her daughter's case.", + "length": 57 + }, + { + "text": "Her estranged husband has chosen to file for an annulment.", + "length": 58 + }, + { + "text": "However the website's lawyer David Gingras told Cincinnati.", + "length": 59 + }, + { + "text": "said the claims cost her reputation with colleagues at the .", + "length": 60 + }, + { + "text": "Bengal cheerleader and teacher Sarah Jones has sued TheDirty.", + "length": 61 + }, + { + "text": "Cincinnati Bengals cheerleader accused of having sex with a .", + "length": 61 + }, + { + "text": "She pleaded not guilty to unlawful use of electronic means to .", + "length": 63 + }, + { + "text": "charge often reserved for people peddling child pornography over .", + "length": 66 + }, + { + "text": "Sarah Jones, as well as those of the alleged victim's family, were .", + "length": 68 + }, + { + "text": "school principal who is now on administrative leave, faces a single .", + "length": 69 + }, + { + "text": "Ben-Gals cheerleading squad, was indicted in Covington, Kentucky on .", + "length": 69 + }, + { + "text": "Jones's bond was previously set at $50,000 while her mother's was set .", + "length": 71 + }, + { + "text": "'To stand in front of 30 15-year-olds and tell them you don't have two .", + "length": 72 + }, + { + "text": "take photographs down, more posts appeared claiming she had had sex with .", + "length": 74 + }, + { + "text": "STDs and that you are not a slut is the hardest thing you will ever have .", + "length": 74 + }, + { + "text": "Sarah Jones, 26, has brought a lawsuit against the gossip website TheDirty.", + "length": 75 + }, + { + "text": "right to probe the allegations being made against her in the criminal trial.", + "length": 76 + }, + { + "text": "com posted the gossip about Jones in 2009 and was sued by the former teacher .", + "length": 78 + }, + { + "text": "Mr Gringas said he planned to grill Jones about the sexual allegations against her.", + "length": 83 + }, + { + "text": "of sleeping with a 16-year-old football player while teaching in a Kentucky school last .", + "length": 90 + }, + { + "text": "16-year-old male student will be allowed to sue a website for defamation, a court has ruled.", + "length": 92 + }, + { + "text": "She resigned from her job teaching high school English in November citing 'personal reasons'.", + "length": 93 + }, + { + "text": "com after photographs and postings were uploaded in 2009 that branded her a 'slut with two STDs'.", + "length": 97 + }, + { + "text": "Then: Jones, 26, was captain of the Ben-Gals between 2011-2012 and a five-year veteran of the squad .", + "length": 101 + }, + { + "text": "Accused: Her mother Cheryl, left, has also been accused of tampering with evidence in her daughter's case .", + "length": 107 + }, + { + "text": "com that the court's decision 'is not the result I wanted to see, but it is actually a disaster for Jones'.", + "length": 107 + }, + { + "text": "Defying expectations: Former cheerleading captain Sarah Jones (right) pictured on a night out with a friend .", + "length": 109 + }, + { + "text": "Posts claimed she had sex with a boyfriend in her classroom at Dixie Heights High School in Edgewood, Kentucky.", + "length": 111 + }, + { + "text": "Circuit Court of Appeals decided yesterday that the website could not appeal the decision that it was liable to Jones.", + "length": 118 + }, + { + "text": "Booked: Sarah Jones and her mother Cheryl, right, both pleaded not guilty to their charges and will stand trial in June .", + "length": 121 + }, + { + "text": "The indictment states the alleged sexual relationship with the high school student took place between October 1 and December 31.", + "length": 128 + }, + { + "text": "com had hoped it would be shielded by the Communications Decency Act of 1996 (CDA) because it had merely posted information written by others.", + "length": 142 + }, + { + "text": "com for character defamation after it posted photos in 2009 branding her a 'slut' for allegedly sleeping with a student at her Kentucky school .", + "length": 144 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6077812910079956 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:34.509860069Z", + "first_section_created": "2025-12-23T09:32:34.51014468Z", + "last_section_published": "2025-12-23T09:32:34.510494194Z", + "all_results_received": "2025-12-23T09:32:34.582616907Z", + "output_generated": "2025-12-23T09:32:34.582783014Z", + "total_processing_time_ms": 72, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 72, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:34.51014468Z", + "publish_time": "2025-12-23T09:32:34.510494194Z", + "first_worker_start": "2025-12-23T09:32:34.511026616Z", + "last_worker_end": "2025-12-23T09:32:34.581595Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:34.511062317Z", + "start_time": "2025-12-23T09:32:34.51112302Z", + "end_time": "2025-12-23T09:32:34.511207323Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:34.511342Z", + "start_time": "2025-12-23T09:32:34.511512Z", + "end_time": "2025-12-23T09:32:34.581595Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:34.510983714Z", + "start_time": "2025-12-23T09:32:34.511064217Z", + "end_time": "2025-12-23T09:32:34.511190022Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:34.510956713Z", + "start_time": "2025-12-23T09:32:34.511026616Z", + "end_time": "2025-12-23T09:32:34.511054417Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3960, + "slowest_section_id": 0, + "slowest_section_time_ms": 71 + } +} diff --git a/data/output/0012ce2f3c77b0cd8fca9e7347b19328a4ff27d4.json b/data/output/0012ce2f3c77b0cd8fca9e7347b19328a4ff27d4.json new file mode 100644 index 0000000..a482834 --- /dev/null +++ b/data/output/0012ce2f3c77b0cd8fca9e7347b19328a4ff27d4.json @@ -0,0 +1,242 @@ +{ + "file_name": "0012ce2f3c77b0cd8fca9e7347b19328a4ff27d4.txt", + "total_words": 324, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "service", + "count": 10 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "grant", + "count": 6 + }, + { + "word": "britain", + "count": 5 + }, + { + "word": "has", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "C.", + "length": 2 + }, + { + "text": "E.", + "length": 2 + }, + { + "text": "L.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "It suits us.", + "length": 12 + }, + { + "text": "Peter Lloyd .", + "length": 13 + }, + { + "text": "Now he has spoken out I hope we can eventually secure a debate in Parliament.", + "length": 77 + }, + { + "text": "Britain is an ideal nation  for military service because we get on with things.", + "length": 80 + }, + { + "text": "Grant’s father, also called James, trained at Sandhurst and spent eight years in the Army.", + "length": 92 + }, + { + "text": "’ However, an online campaign to prevent the return of National Service has 35,000 supporters.", + "length": 96 + }, + { + "text": "Hugh Grant has urged Britain to reintroduce National Service, saying:  ‘It goes with our personality.", + "length": 104 + }, + { + "text": "Hero: Major James Grant, Hugh's grandfather, won the Distinguished Service Order for bravery and leadership during the Second World War .", + "length": 137 + }, + { + "text": "He had defied an order to surrender to overwhelming German troops when stranded at St Valery-en-Caux, France, after the 1940 Dunkirk evacuation.", + "length": 144 + }, + { + "text": "’ Although the actor has never served in the  Forces, he paid tribute  to Britain’s military  tradition, which made heroes of his forefathers.", + "length": 149 + }, + { + "text": "Ideal: The actor said Britain is an ideal nation for military service because 'we get on with things' ‘National Service can definitely benefit society.", + "length": 153 + }, + { + "text": "’ Calling for a return to National Service is usually the preserve of Tory politicians, but Grant added: ‘I have left views, but I also have right views.", + "length": 157 + }, + { + "text": "Tory MP Philip Hollobone, who has drawn up a Private Member’s Bill calling for Britain to reintroduce National Service, said: ‘Hugh is clearly a very sensible chap.", + "length": 168 + }, + { + "text": "Speaking at last week’s launch of singer and photographer Bryan Adams’s book Wounded: The Legacy Of War, Grant, 54, said: ‘My father and grandfather both served and it shaped them.", + "length": 186 + }, + { + "text": "But the millionaire actor admitted that ‘probably the closest I’ll get’ to military service is playing a  naval intelligence commander in Guy Ritchie’s forthcoming film The Man From U.", + "length": 193 + }, + { + "text": "’ The actor’s grandfather Major James Murray Grant served with the Seaforth Highlanders and  was awarded a Distinguished Service Order  for bravery and leadership during the Second World War.", + "length": 197 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4265832304954529 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:35.011500533Z", + "first_section_created": "2025-12-23T09:32:35.013024895Z", + "last_section_published": "2025-12-23T09:32:35.013201402Z", + "all_results_received": "2025-12-23T09:32:35.079735305Z", + "output_generated": "2025-12-23T09:32:35.079886711Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:35.013024895Z", + "publish_time": "2025-12-23T09:32:35.013201402Z", + "first_worker_start": "2025-12-23T09:32:35.013705222Z", + "last_worker_end": "2025-12-23T09:32:35.078741Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:35.013718223Z", + "start_time": "2025-12-23T09:32:35.013771625Z", + "end_time": "2025-12-23T09:32:35.013835928Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:35.013887Z", + "start_time": "2025-12-23T09:32:35.01403Z", + "end_time": "2025-12-23T09:32:35.078741Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:35.01365262Z", + "start_time": "2025-12-23T09:32:35.013707122Z", + "end_time": "2025-12-23T09:32:35.013769025Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:35.01363972Z", + "start_time": "2025-12-23T09:32:35.013705222Z", + "end_time": "2025-12-23T09:32:35.013729623Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1982, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0012f36726895988fe021995c2e649ff76c48639.json b/data/output/0012f36726895988fe021995c2e649ff76c48639.json new file mode 100644 index 0000000..0ef37e6 --- /dev/null +++ b/data/output/0012f36726895988fe021995c2e649ff76c48639.json @@ -0,0 +1,222 @@ +{ + "file_name": "0012f36726895988fe021995c2e649ff76c48639.txt", + "total_words": 333, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "energy", + "count": 6 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "is", + "count": 6 + }, + { + "word": "as", + "count": 5 + }, + { + "word": "charge", + "count": 5 + }, + { + "word": "solar", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "The base of the gadget conceals a battery that stores the solar energy.", + "length": 71 + }, + { + "text": "Bonsai is a traditional Japanese art form in which trees are grown in miniature pots.", + "length": 85 + }, + { + "text": "Its leaves might not be green, but its potential for energy efficiency most certainly is.", + "length": 89 + }, + { + "text": "The designer is waiting to receive 400 presale orders for the product before commencing production.", + "length": 99 + }, + { + "text": "At full capacity it can hold enough power to charge an iPad twice and can charge a phone in just four hours.", + "length": 108 + }, + { + "text": "Ornate: This mock Bonsai tree is in fact a solar-powered charger for everyday electrical devices called the Electree+ .", + "length": 119 + }, + { + "text": "Created by French designer Vivien Muller, the Electree+ conceals a USB connection underneath its wood-topped base unit.", + "length": 119 + }, + { + "text": "With energy costs soaring, the gadget has been designed to help technology-geeks charge their phone in a green-friendly manner.", + "length": 127 + }, + { + "text": "But while the gadget may cut down on your energy bills, it might take you a while to recoup the savings, as it will costs £283.", + "length": 128 + }, + { + "text": "The Electree+ doubles as both an ornament and a charger for everyday electrical devices - without the need for any delicate pruning.", + "length": 132 + }, + { + "text": "The designer said he was inspired to make the product after observing real trees, noticing that their leaves acted as natural solar panels.", + "length": 139 + }, + { + "text": "Energy efficient: A battery is concealed in the base of the device that holds enough energy to charge two iPads and can charge a phone in four hours .", + "length": 150 + }, + { + "text": "This little mock bonsai tree might look like the kind of thing that just sits prettily on top of a coffee table - but it is actually a rather ornate phone charger.", + "length": 163 + }, + { + "text": "The device has 27 miniature silicon solar panels - or 'leaves', as they are called - that can be arranged in any way the owner wants to create their own bespoke tree.", + "length": 166 + }, + { + "text": "Inspired by nature: The Electree's designer Vivien Muller, left, said he was inspired to make the gadget after observing real trees and noticing how their leaves acted as natural solar panels .", + "length": 193 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.40351104736328125 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:35.51400525Z", + "first_section_created": "2025-12-23T09:32:35.514485969Z", + "last_section_published": "2025-12-23T09:32:35.51474088Z", + "all_results_received": "2025-12-23T09:32:35.579343105Z", + "output_generated": "2025-12-23T09:32:35.579573114Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:35.514485969Z", + "publish_time": "2025-12-23T09:32:35.51474088Z", + "first_worker_start": "2025-12-23T09:32:35.515330704Z", + "last_worker_end": "2025-12-23T09:32:35.578414Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:35.515286202Z", + "start_time": "2025-12-23T09:32:35.515330704Z", + "end_time": "2025-12-23T09:32:35.515361905Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:35.515524Z", + "start_time": "2025-12-23T09:32:35.515668Z", + "end_time": "2025-12-23T09:32:35.578414Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:35.515335204Z", + "start_time": "2025-12-23T09:32:35.515391606Z", + "end_time": "2025-12-23T09:32:35.515446208Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:35.515282602Z", + "start_time": "2025-12-23T09:32:35.515349204Z", + "end_time": "2025-12-23T09:32:35.515422707Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1926, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/00139ad128fc573dac7dfe4ce4921a07a408be60.json b/data/output/00139ad128fc573dac7dfe4ce4921a07a408be60.json new file mode 100644 index 0000000..1499941 --- /dev/null +++ b/data/output/00139ad128fc573dac7dfe4ce4921a07a408be60.json @@ -0,0 +1,392 @@ +{ + "file_name": "00139ad128fc573dac7dfe4ce4921a07a408be60.txt", + "total_words": 1296, + "top_n_words": [ + { + "word": "the", + "count": 98 + }, + { + "word": "of", + "count": 38 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "was", + "count": 27 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "monis", + "count": 26 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "he", + "count": 23 + }, + { + "word": "a", + "count": 18 + }, + { + "word": "his", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "All of these are believed to have been rejected.", + "length": 48 + }, + { + "text": "All of these are believed to have been rejected .", + "length": 49 + }, + { + "text": "'An enemy would bluntly claim that they are the enemy.", + "length": 54 + }, + { + "text": "They were more worried about the backpack than the gun.", + "length": 55 + }, + { + "text": "'The core of an Islamic society is justice, social justice.", + "length": 59 + }, + { + "text": "The way Monis spoke was described as 'slow' and 'low-pitched'.", + "length": 62 + }, + { + "text": "Monis believed moderate Muslims would spell the death of extremists.", + "length": 68 + }, + { + "text": "'The team who went through the doors still thought there were bombs (inside).", + "length": 77 + }, + { + "text": "Society should behave in an Islamic manner and there should be justice,' he said.", + "length": 81 + }, + { + "text": "Monis went on to say people should fear the ones who declared 'Islam is in jeopardy'.", + "length": 85 + }, + { + "text": "Mr Daoud said the gunman was a 'very good listener' who 'wasn't stupid, he's intelligent'.", + "length": 90 + }, + { + "text": "At times, Monis became so passionate he was seen thumping his fists to drive home his message.", + "length": 94 + }, + { + "text": "' 'They were confronted by a man with what they think is a bomb strapped to him and firing a gun at them as they’re going in.", + "length": 127 + }, + { + "text": "Canvassing what he thought was the virtues of Sharia law, he pushed the case for Australia to take on more of the Islamic culture.", + "length": 130 + }, + { + "text": "His offer was one of many put forward by other Muslim community members, including the Grand Mufti, Ibrahim Abu Mohamed (pictured).", + "length": 131 + }, + { + "text": "Six State Protection Group officers believed the gunman had a bomb strapped to his back as they stormed the Lindt cafe on December 16 .", + "length": 135 + }, + { + "text": "The Bomb Squad then entered the building and discovered that despite wires hanging out of Monis' backpack - it did not contain explosives.", + "length": 138 + }, + { + "text": "Showing his level of paranoia, the terrorist also warned one of the biggest threats against fundamentalist Muslims lay within their community.", + "length": 142 + }, + { + "text": "Monis and two of his hostages, Tori Johnson and Katrina Dawson, were killed in the bloody conclusion to the siege in the early hours of December 16 .", + "length": 149 + }, + { + "text": "was good at networking from both sides of the Muslim community - Sunni and Shia, saying 'He knew a lot about everybody, they knew nothing about him'.", + "length": 149 + }, + { + "text": "' Once Monis was killed, the next concern for the officers was getting the remainder of the female hostages out of the building as safely as possible.", + "length": 150 + }, + { + "text": "Despite this show of support, Monis will be buried in an undisclosed location in Melbourne after the Sydney Muslim community reportedly refused to bury him.", + "length": 156 + }, + { + "text": "It follows claims from the Muslim community no Muslim funeral home would accept him as his body remained in the morgue unclaimed, days after the siege had ended.", + "length": 161 + }, + { + "text": "His offer of help police negotiate with the 50-year-old was one of many put forward by other Muslim community members, including the Grand Mufti, Ibrahim Abu Mohamed.", + "length": 166 + }, + { + "text": "Instead, he claimed Monis (pictured) was only seeking attention and was motivated by his problems with the government and his limited ability to contact his children .", + "length": 168 + }, + { + "text": "It was only then that the officers had time to think about themselves and fled the cafe as the contents of the backpack attached to the gunman's back were still unknown.", + "length": 169 + }, + { + "text": "In the same year he fell off the radar of intelligence agencies, Monis warned about keeping an eye on 'friends' who assimilated into the community before delivering terror.", + "length": 172 + }, + { + "text": "Meanwhile, tapes of the man responsible for the Martin Place terror plot delivering religious rants, saying 'Society should behave in an Islamic manner', have been unearthed.", + "length": 174 + }, + { + "text": "An Islamic State propaganda magazine lavished Monis' with praise for the siege, labelling the hostage crisis a 'daring raid' and calling for more lone wolf attacks in Australia .", + "length": 178 + }, + { + "text": "He believed governments who were 'not aware that there [was] criminal activity happening in your country' then officials should resign from their positions as they were 'incompetent'.", + "length": 183 + }, + { + "text": "Meanwhile, Monis' body has finally been claimed by some of his anonymous acquaintances, and will be buried somewhere in Melbourne within the next few days, according to the Telegraph.", + "length": 183 + }, + { + "text": "The advocate said Monis worshiped at different mosques around Sydney, which included Granville's Nabi Akram Islamic Centre, in Sydney's west - a location where these religious rants were given.", + "length": 195 + }, + { + "text": "Mamdouh Habib, who was assisted by Monis in 2007 when he sought Labor's seat in Sydney's western suburb of Auburn, told Fairfax he knew him 'very well' and insisted that Monis was 'not violent'.", + "length": 195 + }, + { + "text": "The six State Protection Group officers were ordered to enter the Martin Place building immediately after cafe manager Tori Johnson was shot in the head by the gunman, reports the Daily Telegraph.", + "length": 198 + }, + { + "text": "The footage, translated by a Farsi translator for The Daily Telegraph, slammed overseas government who experienced the most violent of crimes, saying: 'your intelligence service is not working properly'.", + "length": 204 + }, + { + "text": "The highly-trained police officers were more concerned about the contents of the Man Haron Monis' backpack than his firearm when they stormed the Lindt cafe in the chilling final seconds of the Sydney siege.", + "length": 207 + }, + { + "text": "It comes after former failed politician and Guantánamo Bay detainee, who previously employed gunman Monis, claimed he could have convinced the 50-year-old to end the Sydney siege without resorting to violence.", + "length": 210 + }, + { + "text": "The wrongly accused former Guantánamo Bay detainee, who was tortured after being arrested in Pakistan under the false pretences he had knowledge of the September 11 attacks, claimed Monis was only seeking attention.", + "length": 216 + }, + { + "text": "Mr Habib offered to help negotiate with the gunman during the siege, and maintains that Monis, while 'sick and disturbed' was motivated by his problems with the government and his limited ability to contact his children.", + "length": 220 + }, + { + "text": "An ignorant friend claims that they are friends and they approach you, you would socialise with them and associate with them, but one day like a bear, they lift a big stone and they hit it to the face of that person and kills him,' he said.", + "length": 240 + }, + { + "text": "The crucial movements of the SPG officers have been praised as 'text book entry' by those within the police force, as the specially-trained squad displayed a calm and composed nature despite their suspicions that Monis was strapped with an explosive.", + "length": 250 + }, + { + "text": "The December siege, which shocked Australia and sparked an outpouring of grief from across the nation, ended with the deaths of cafe manager Tori Johnson and mother-of-three Katrina Dawson - who worked as a barrister at nearby law chambers Eight Selborne.", + "length": 255 + }, + { + "text": "Officers were ordered to enter the Martin Place cafe after Monis (left) shot cafe manager Tori Johnson (right) 'They (the SPG) knew as soon the hostage was shot … that they would be going through that door,' a senior police source told The Daily Telegraph.", + "length": 259 + }, + { + "text": "He insistently told Fairfax he believed Monis had no intention of harming any of the 18 people he took hostage at the Lindt Café in Martin Place on December 15, nor that his attack was associated with motivations linked to terrorist organisation the Islamic State.", + "length": 265 + }, + { + "text": "In 2009, the self-styled sheik - five years before he carried out the 16-hour attack where he held 18 people hostage in Martin Place's Lindt Cafe in Sydney's CBD last month - imparted his extremist views to a crowd inside a prayer hall in the city's western suburbs.", + "length": 266 + }, + { + "text": "Adding to the terrifying picture that has been pieced together of Monis over the past couple of weeks following the siege, friends of the gunman said he was an enigma who set up political party Hezbollah Australia and funded a Campsie warehouse to be turned into a prayer hall.", + "length": 277 + }, + { + "text": "Mamdouh Habib (pictured), who was assisted by Monis in 2007 when he sought Labor's seat in Sydney's western suburb of Auburn, said he knew Man Haron Monis 'very well' and insisted that the Sydney siege gunman was 'not violent' Refugee advocate Jamal Daoud told News Corp he did not know where Monis' money was sourced and the terrorist was 'mysterious'.", + "length": 354 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.542345866560936 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:36.015191613Z", + "first_section_created": "2025-12-23T09:32:36.015548328Z", + "last_section_published": "2025-12-23T09:32:36.016013847Z", + "all_results_received": "2025-12-23T09:32:36.109185932Z", + "output_generated": "2025-12-23T09:32:36.109406841Z", + "total_processing_time_ms": 94, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 93, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:36.015548328Z", + "publish_time": "2025-12-23T09:32:36.015775837Z", + "first_worker_start": "2025-12-23T09:32:36.016353761Z", + "last_worker_end": "2025-12-23T09:32:36.086133Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:36.016426264Z", + "start_time": "2025-12-23T09:32:36.016498266Z", + "end_time": "2025-12-23T09:32:36.01659807Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:36.016603Z", + "start_time": "2025-12-23T09:32:36.016766Z", + "end_time": "2025-12-23T09:32:36.086133Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:36.016358561Z", + "start_time": "2025-12-23T09:32:36.016443964Z", + "end_time": "2025-12-23T09:32:36.01659547Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:36.016284758Z", + "start_time": "2025-12-23T09:32:36.016353761Z", + "end_time": "2025-12-23T09:32:36.016424063Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:36.01585404Z", + "publish_time": "2025-12-23T09:32:36.016013847Z", + "first_worker_start": "2025-12-23T09:32:36.016397362Z", + "last_worker_end": "2025-12-23T09:32:36.108333Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:36.016502167Z", + "start_time": "2025-12-23T09:32:36.016533968Z", + "end_time": "2025-12-23T09:32:36.01659237Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:36.016772Z", + "start_time": "2025-12-23T09:32:36.017033Z", + "end_time": "2025-12-23T09:32:36.108333Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:36.016563869Z", + "start_time": "2025-12-23T09:32:36.016614271Z", + "end_time": "2025-12-23T09:32:36.016683874Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:36.01635066Z", + "start_time": "2025-12-23T09:32:36.016397362Z", + "end_time": "2025-12-23T09:32:36.016439564Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 160, + "min_processing_ms": 69, + "max_processing_ms": 91, + "avg_processing_ms": 80, + "median_processing_ms": 91, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3844, + "slowest_section_id": 1, + "slowest_section_time_ms": 92 + } +} diff --git a/data/output/0013a12665155c61acfe30384ca6bf1f1254c5fd.json b/data/output/0013a12665155c61acfe30384ca6bf1f1254c5fd.json new file mode 100644 index 0000000..d01156e --- /dev/null +++ b/data/output/0013a12665155c61acfe30384ca6bf1f1254c5fd.json @@ -0,0 +1,294 @@ +{ + "file_name": "0013a12665155c61acfe30384ca6bf1f1254c5fd.txt", + "total_words": 628, + "top_n_words": [ + { + "word": "a", + "count": 27 + }, + { + "word": "the", + "count": 21 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "on", + "count": 12 + }, + { + "word": "gambling", + "count": 10 + }, + { + "word": "after", + "count": 9 + }, + { + "word": "he", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "' Scroll down for video .", + "length": 25 + }, + { + "text": "'My whole personality changed.", + "length": 30 + }, + { + "text": "He said 'FOBTs are so dangerous.", + "length": 32 + }, + { + "text": "5 billion-a-year in profit for bookmakers.", + "length": 42 + }, + { + "text": "He had also just moved in with a new girlfriend.", + "length": 48 + }, + { + "text": "After that, all I thought about all day was gambling.", + "length": 53 + }, + { + "text": "Simon said: 'These fixed odds betting terminals destroy you.", + "length": 60 + }, + { + "text": "'Bookies have taken advantage of a loophole in the tax system.", + "length": 62 + }, + { + "text": "'Crack cocaine of gambling': Mr Perfitt said 'FOBTs are so dangerous.", + "length": 69 + }, + { + "text": "I became very introverted, made excuses not to see family and friends.", + "length": 70 + }, + { + "text": "You can lose hundreds of pounds in a matter of minutes with a few spins.", + "length": 72 + }, + { + "text": "Going there probably saved my life and I haven't gambled for three years.", + "length": 73 + }, + { + "text": "'Within 10 years I had lost £200,000, a relationship and my home as well.", + "length": 74 + }, + { + "text": "A recent poll found that three in four voters wanted a ban on the machines.", + "length": 75 + }, + { + "text": "'I worked to go on these machines and could spend up to 12 hours a day in there.", + "length": 80 + }, + { + "text": "You can lose hundreds of pounds in a matter of minutes with a few spins' File picture .", + "length": 87 + }, + { + "text": "'This allows them to set a maximum stake of £100, as opposed to the £2 maximum on machines in arcades.", + "length": 104 + }, + { + "text": "The charity is also behind UK National Gambling Addiction Awareness Week taking place between December 1 and 7.", + "length": 111 + }, + { + "text": "He said: 'They helped me with my cognitive behaviour, helping to try and sort out my distorted thought patterns.", + "length": 112 + }, + { + "text": "A former gambling addict has revealed how he blew £200,000 in 10 years after becoming hooked on gambling machines.", + "length": 115 + }, + { + "text": "I became addicted instantly after a friend who played the machines asked me to pop into a bookies one day and have a go.", + "length": 120 + }, + { + "text": "Simon Perfitt, 58, went from a £50,000 job to living on benefits after he started playing fixed odds betting terminals .", + "length": 121 + }, + { + "text": "Surprisingly, he didn't start betting until he was 45, but 10 years later was a broken man both financially and emotionally.", + "length": 124 + }, + { + "text": "I used to get up early and go in to the bookies before I went to work, at lunchtime and would go straight into one after work.", + "length": 126 + }, + { + "text": "The businessman had led a lavish lifestyle back in 2001, enjoying well-paid jobs in e-commerce which allowed him to own a Porsche.", + "length": 130 + }, + { + "text": "His charity, which launches in November, will campaign for a reduction in the maximum stake on the machines, currently set at £100 every 20 seconds.", + "length": 149 + }, + { + "text": "Mr Perfitt has now set up a charity called Rethink Gambling to raise awareness of the dangers of gambling and to campaign against the FOBT terminals.", + "length": 149 + }, + { + "text": "Regular players are ‘like zombies’, the managers said, giving examples of how customers could attack staff and vandalise shops when they lose money.", + "length": 152 + }, + { + "text": "They account for around half of bookmakers’ profits but betting shop managers have anonymously told the Mail of the toll they take on customers and staff.", + "length": 156 + }, + { + "text": "Simon Perfitt, 58, went from a £50,000 job to living on benefits after he started playing fixed odds betting terminals (FOBT) - blowing up to £3,000 per day.", + "length": 159 + }, + { + "text": "' The machines, of which there are 33,000 in betting shops across the country, have been blamed for a rise in problem gambling and have also been linked to money laundering.", + "length": 173 + }, + { + "text": "Bookies have been able to argue that feeding money into a FOBT is like betting on a horse race since the 'event' you are gambling on is happening on a computer server elsewhere.", + "length": 177 + }, + { + "text": "Mr Perfitt finally sought help after moving from Cambridge to live in Dudley in 2011 after getting in touch with the Gordon Moody Association, which helps gamblers break the habit.", + "length": 180 + }, + { + "text": "But Mr Perfitt, from Dudley, West Midlands lost it all after becoming addicted to playing roulette on the controversial machines, dubbed the crack cocaine of gambling and which rake in £1.", + "length": 189 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.408788800239563 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:36.516803294Z", + "first_section_created": "2025-12-23T09:32:36.51718281Z", + "last_section_published": "2025-12-23T09:32:36.517396318Z", + "all_results_received": "2025-12-23T09:32:36.581718032Z", + "output_generated": "2025-12-23T09:32:36.58192344Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:36.51718281Z", + "publish_time": "2025-12-23T09:32:36.517396318Z", + "first_worker_start": "2025-12-23T09:32:36.51792784Z", + "last_worker_end": "2025-12-23T09:32:36.58069Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:36.517900639Z", + "start_time": "2025-12-23T09:32:36.517972542Z", + "end_time": "2025-12-23T09:32:36.518048245Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:36.518173Z", + "start_time": "2025-12-23T09:32:36.518319Z", + "end_time": "2025-12-23T09:32:36.58069Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:36.517884438Z", + "start_time": "2025-12-23T09:32:36.517953241Z", + "end_time": "2025-12-23T09:32:36.518076246Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:36.517858037Z", + "start_time": "2025-12-23T09:32:36.51792784Z", + "end_time": "2025-12-23T09:32:36.517990442Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3530, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0013a4baaf65a56c857088f2e236bfbc080c88dc.json b/data/output/0013a4baaf65a56c857088f2e236bfbc080c88dc.json new file mode 100644 index 0000000..6e02489 --- /dev/null +++ b/data/output/0013a4baaf65a56c857088f2e236bfbc080c88dc.json @@ -0,0 +1,676 @@ +{ + "file_name": "0013a4baaf65a56c857088f2e236bfbc080c88dc.txt", + "total_words": 1248, + "top_n_words": [ + { + "word": "the", + "count": 61 + }, + { + "word": "to", + "count": 39 + }, + { + "word": "and", + "count": 35 + }, + { + "word": "her", + "count": 31 + }, + { + "word": "she", + "count": 29 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "ortega", + "count": 26 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "that", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "At .", + "length": 4 + }, + { + "text": "He .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "food.", + "length": 5 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "When .", + "length": 6 + }, + { + "text": "' In .", + "length": 6 + }, + { + "text": "house.", + "length": 6 + }, + { + "text": "While .", + "length": 7 + }, + { + "text": "Ortega, .", + "length": 9 + }, + { + "text": "'Nessie .", + "length": 9 + }, + { + "text": "Republic.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "mentally fit.", + "length": 13 + }, + { + "text": "Quoted by the .", + "length": 15 + }, + { + "text": "tragedy on her.", + "length": 15 + }, + { + "text": "being cared for.", + "length": 16 + }, + { + "text": "'Someone else did.", + "length": 18 + }, + { + "text": "woke from the coma.", + "length": 19 + }, + { + "text": "to do the housework.", + "length": 20 + }, + { + "text": "beautiful and tough.", + "length": 20 + }, + { + "text": "She saves us every day.", + "length": 23 + }, + { + "text": "to proceed to her trial.", + "length": 24 + }, + { + "text": "We are filled with many .", + "length": 25 + }, + { + "text": "called defense witness Dr.", + "length": 26 + }, + { + "text": "04:13 EST, 13 August 2013 .", + "length": 27 + }, + { + "text": "16:27 EST, 13 August 2013 .", + "length": 27 + }, + { + "text": "In a conversation with her .", + "length": 28 + }, + { + "text": "Quoted by NY Daily News, she .", + "length": 31 + }, + { + "text": "But the Krims' extended family .", + "length": 32 + }, + { + "text": "Ortega became extremely animated .", + "length": 34 + }, + { + "text": "Last month, Ortega was examined by .", + "length": 36 + }, + { + "text": "In April, Ortega, was ruled fit to .", + "length": 36 + }, + { + "text": "dead in a pool of blood in the bath.", + "length": 36 + }, + { + "text": "She has pleaded not guilty to murder.", + "length": 37 + }, + { + "text": "'She said something like, \"I'm paid .", + "length": 37 + }, + { + "text": "After learning from the doorman that .", + "length": 38 + }, + { + "text": "Ortega is due to stand trial for the .", + "length": 38 + }, + { + "text": "Ortega, Lucia and Leo had been due to .", + "length": 39 + }, + { + "text": "Her lawyer is challenging those results.", + "length": 40 + }, + { + "text": "with her family, in reasoned statements.", + "length": 40 + }, + { + "text": "Jill Reilly, Helen Pow and Helen Collis .", + "length": 41 + }, + { + "text": "She has pleaded not guilty to the charges.", + "length": 42 + }, + { + "text": "the upper West Side, New York, last October.", + "length": 44 + }, + { + "text": "can’t wait to welcome her new baby brother.", + "length": 45 + }, + { + "text": "remembered details of the planned renovations.", + "length": 46 + }, + { + "text": "murder and two counts of second-degree murder.", + "length": 46 + }, + { + "text": "the specific nature of what [the law] demands.", + "length": 46 + }, + { + "text": "Karen Rosenbaum, who testified that Ortega’s .", + "length": 48 + }, + { + "text": "And she's grown so much over the past two months.", + "length": 49 + }, + { + "text": "She sounded coherent while discussing everyday matters.", + "length": 55 + }, + { + "text": "Doctors offered diverging views of Ortega's mental state.", + "length": 57 + }, + { + "text": "We are very grateful to you all for your amazing support.", + "length": 57 + }, + { + "text": "that if she didn't improve, they might need to replace her.", + "length": 59 + }, + { + "text": "apartment and came across the grisly scene in the bathroom.", + "length": 59 + }, + { + "text": "to watch the children, not clean up and do housework\",' a .", + "length": 59 + }, + { + "text": "The Krims were also reportedly worried about Ortega's job .", + "length": 59 + }, + { + "text": "concerned when they failed to turn up and returned to the home.", + "length": 63 + }, + { + "text": "said that they are drawing strength from their remaining child.", + "length": 63 + }, + { + "text": "Authorities say Ortega also cut her throat in a suicide attempt.", + "length": 64 + }, + { + "text": "travel on holidays and even visited her family in the Dominican .", + "length": 65 + }, + { + "text": "been done, because otherwise, 'the tiles get dirty while painting'.", + "length": 67 + }, + { + "text": "meet Mrs Krim and Nessie at a dance studio, but the mother became .", + "length": 67 + }, + { + "text": "when workers were scheduled to arrive, the news service said, and .", + "length": 67 + }, + { + "text": "two mental health professionals, both of whom found that she could .", + "length": 68 + }, + { + "text": "stand trial and will now face a jury for two counts of first-degree .", + "length": 69 + }, + { + "text": "a hearing on Monday, the court heard a recording of Ortega chatting .", + "length": 69 + }, + { + "text": "prosecutors said this was evidence of her sanity, Ms Leer-Greenberg .", + "length": 69 + }, + { + "text": "also expressed concern about the family planning to go out at a time .", + "length": 70 + }, + { + "text": "emotions as we look to the future, but the most important one is hope.", + "length": 70 + }, + { + "text": "news service, Rosenbaum said: 'Somebody can function in a day-to-day .", + "length": 70 + }, + { + "text": "performance in the weeks leading up to the killings and had told her .", + "length": 70 + }, + { + "text": "to paint Mrs Krim in a bad light, shifting part of the blame for the .", + "length": 70 + }, + { + "text": "January, Mr Krim, an executive at America's CNBC television station, .", + "length": 70 + }, + { + "text": "had numerous disagreements with mother Marina about how the kids were .", + "length": 71 + }, + { + "text": "seemingly lucid thought process during the calls was not proof she is .", + "length": 71 + }, + { + "text": "questioned by police after awaking from the coma, the nanny attempted .", + "length": 71 + }, + { + "text": "17-year-old son, Jesus, she asked about renovations being done to the .", + "length": 71 + }, + { + "text": "dispute they treated her poorly, explaining that they paid for her to .", + "length": 71 + }, + { + "text": "Ortega's suicide bid failed and she was put in a medically-induced coma.", + "length": 72 + }, + { + "text": "the nanny had not left the building, Mrs Krim frantically searched the .", + "length": 72 + }, + { + "text": "nanny attempted to slit her own throat on October 25 after Marina Krim .", + "length": 72 + }, + { + "text": "In June, the Krim's revealed they are expecting a baby boy in October, .", + "length": 72 + }, + { + "text": "said: 'Marina and I couldn't be more proud of her - she is very smart, .", + "length": 72 + }, + { + "text": "showed understanding about why tiles would be laid after paint work has .", + "length": 73 + }, + { + "text": "assist in her defense in a meaningful way and that she was mentally fit .", + "length": 73 + }, + { + "text": "who had told neighbors and family that she was short of money, revealed .", + "length": 73 + }, + { + "text": "came home to her Upper West Side apartment to find her daughter and son .", + "length": 73 + }, + { + "text": "2, who were found stabbed to death inside their parents’ apartment on .", + "length": 73 + }, + { + "text": "first-degree murder of the two children, Lucia and Leo Krim, ages 6 and .", + "length": 73 + }, + { + "text": "she resented her employers because they kept telling her what to do and .", + "length": 73 + }, + { + "text": "Her lawyer is arguing that she is not mentally well enough for the trial .", + "length": 74 + }, + { + "text": "law-enforcement source said of Ortega’s statements to police after she .", + "length": 74 + }, + { + "text": "sense on some level but still not be competent to stand trial because of .", + "length": 74 + }, + { + "text": "when she discussed the incident with police, telling officers that  she .", + "length": 74 + }, + { + "text": "' Ortega did not say who else could be responsible for murdering the children.", + "length": 78 + }, + { + "text": "'I didn't do that,' the nanny insisted in jailhouse interview with New York Daily News.", + "length": 87 + }, + { + "text": "Fit for trial: Nanny Yoselyn Ortega has pleaded not guilty to killing Lucia and Leo Krim .", + "length": 90 + }, + { + "text": "Victims: Only Nessie, whose picture has been pixelated, escaped harm as she was with her mother .", + "length": 97 + }, + { + "text": "' 'She de-compensates under stress,' Rosenbaum said on cross-examination, the NY Daily News reported.", + "length": 101 + }, + { + "text": "Loss: Two-year-old Leo and his big sister Lucia, six, were found in a pool of blood in the family's home .", + "length": 106 + }, + { + "text": "Reports claimed Marina didn't think Ortega was interacting with the kids enough and was giving them junk .", + "length": 106 + }, + { + "text": "A New York City nanny charged with stabbing two small children to death has again been found fit for trial.", + "length": 107 + }, + { + "text": "State Supreme Court Justice Gregory Carro also heard recorded phone calls Ortega made from jail to relatives.", + "length": 109 + }, + { + "text": "A Manhattan judge reaffirmed his finding in Yoselyn Ortega's case on Tuesday after he was asked to reconsider.", + "length": 110 + }, + { + "text": "Defense lawyer Valerie Van Leer-Greenberg has said Ortega's delusions and injuries mean she cannot understand the case.", + "length": 119 + }, + { + "text": "Fit state: Yesterday's hearing comes after two psychologists ruled last month that Ortega was mentally fit to stand trial.", + "length": 122 + }, + { + "text": "In June, Ortega claimed someone else killed the children she's accused of stabbing all over their bodies in the Krims home.", + "length": 123 + }, + { + "text": "After the killings, Ortega told doctors she didn't remember the attacks and that she heard voices telling her to kill people.", + "length": 125 + }, + { + "text": "Horrific: Yoselyn Ortega, pictured, was discovered with the two children and a knife as she attempted to slit her own throat .", + "length": 126 + }, + { + "text": "'Hello everyone, We are very happy to let you know that Marina is expecting a baby in the fall,' read the message on the Lulu \u0026 Leo Fund.", + "length": 137 + }, + { + "text": "Ortega allegedly killed six-year-old Lucia Krim and two-year-old Leo Krim in October while their mother was out picking up her third child from a swimming lesson.", + "length": 162 + }, + { + "text": "Happy news: Marina and Kevin Krim, whose son and daughter were allegedly stabbed to death by their nanny last year, have revealed that they are expecting a baby boy this fall .", + "length": 176 + }, + { + "text": "Plea: Yoselyn Ortega, pictured centre, has pleaded not guilty to killing six-year-old Lucia Krim (right) and her two-year-old brother Leo while they were in her care at their Manhattan home .", + "length": 191 + }, + { + "text": "Family: Lulu is pictured with her father Kevin Krim and mother Marina Krim, who found her children stabbed in the bathtub and their nanny with a slit throat and wrists on the bathroom floor .", + "length": 191 + }, + { + "text": "The prosecution presented audio recording of Yoselyn Ortega, 50, speaking coherently with her family on the phone from jail, giving advice and showing concern for building works - evidence they say that she is not mentally disabled.", + "length": 232 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.573839545249939 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:37.018231867Z", + "first_section_created": "2025-12-23T09:32:37.018591782Z", + "last_section_published": "2025-12-23T09:32:37.019125304Z", + "all_results_received": "2025-12-23T09:32:37.139009375Z", + "output_generated": "2025-12-23T09:32:37.139254785Z", + "total_processing_time_ms": 121, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 119, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:37.018591782Z", + "publish_time": "2025-12-23T09:32:37.018932396Z", + "first_worker_start": "2025-12-23T09:32:37.019370014Z", + "last_worker_end": "2025-12-23T09:32:37.138098Z", + "total_journey_time_ms": 119, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:37.019436516Z", + "start_time": "2025-12-23T09:32:37.019493619Z", + "end_time": "2025-12-23T09:32:37.019590723Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:37.019736Z", + "start_time": "2025-12-23T09:32:37.019882Z", + "end_time": "2025-12-23T09:32:37.138098Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 118 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:37.019307111Z", + "start_time": "2025-12-23T09:32:37.019370014Z", + "end_time": "2025-12-23T09:32:37.019464318Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:37.019466718Z", + "start_time": "2025-12-23T09:32:37.01951782Z", + "end_time": "2025-12-23T09:32:37.019607123Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:37.019014599Z", + "publish_time": "2025-12-23T09:32:37.019125304Z", + "first_worker_start": "2025-12-23T09:32:37.019506019Z", + "last_worker_end": "2025-12-23T09:32:37.099798Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:37.019570622Z", + "start_time": "2025-12-23T09:32:37.019612024Z", + "end_time": "2025-12-23T09:32:37.019663726Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:37.01992Z", + "start_time": "2025-12-23T09:32:37.020059Z", + "end_time": "2025-12-23T09:32:37.099798Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:37.019556121Z", + "start_time": "2025-12-23T09:32:37.01977643Z", + "end_time": "2025-12-23T09:32:37.019856433Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:37.019393115Z", + "start_time": "2025-12-23T09:32:37.019506019Z", + "end_time": "2025-12-23T09:32:37.01953082Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 197, + "min_processing_ms": 79, + "max_processing_ms": 118, + "avg_processing_ms": 98, + "median_processing_ms": 118, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3600, + "slowest_section_id": 0, + "slowest_section_time_ms": 119 + } +} diff --git a/data/output/0013bbfc8c09acf18b7dfadc3107e7148033727c.json b/data/output/0013bbfc8c09acf18b7dfadc3107e7148033727c.json new file mode 100644 index 0000000..c9a6aa5 --- /dev/null +++ b/data/output/0013bbfc8c09acf18b7dfadc3107e7148033727c.json @@ -0,0 +1,270 @@ +{ + "file_name": "0013bbfc8c09acf18b7dfadc3107e7148033727c.txt", + "total_words": 463, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "said", + "count": 11 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "s", + "count": 10 + }, + { + "word": "women", + "count": 10 + }, + { + "word": "cardinal", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "children", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "He .", + "length": 4 + }, + { + "text": "society?", + "length": 8 + }, + { + "text": "We don't live in the 1940s.", + "length": 27 + }, + { + "text": "Is not that the suicide of .", + "length": 28 + }, + { + "text": "In a wide-ranging attack on Merkel's .", + "length": 38 + }, + { + "text": "but have a perfect legislation for abortion.", + "length": 44 + }, + { + "text": "'The Church can't drag women back into the kitchen.", + "length": 51 + }, + { + "text": "' Germany has the lowest birth rate in Europe at just 1.", + "length": 56 + }, + { + "text": "Cardinal Meisner's comments have drawn criticism from German women.", + "length": 67 + }, + { + "text": "compared Merkel's government's family policy to that in East Germany.", + "length": 69 + }, + { + "text": "said more value should be placed on the role of mother and father and .", + "length": 71 + }, + { + "text": "policies, the cardinal told German newspaper Stuttgarter Zeitung: 'We are a dying people .", + "length": 90 + }, + { + "text": "The cardinal said greater value should be placed on role of mother and father in German society .", + "length": 97 + }, + { + "text": "But Cardinal Meisner said the policy was taking 'away the youth and future' from the two countries.", + "length": 99 + }, + { + "text": "' Germany has organised a mass apprentice scheme for unemployed young people from Spain and Portugal.", + "length": 101 + }, + { + "text": "The German government spends millions of euros a year trying to encourage women to have more children.", + "length": 102 + }, + { + "text": "Berlin has increased monthly subsidies to families in recent years but it has had little effect on the birth rate.", + "length": 114 + }, + { + "text": "He said under Communist rule, women who chose not to work in order to bring up children were classed as 'demented'.", + "length": 115 + }, + { + "text": "Cardinal Meisner said there was an urgent need to 'create a climate' in Germany that encourages women to have more children.", + "length": 124 + }, + { + "text": "Annegret Laakmann, president of the Catholic group Women's Dignity, told the Daily Telegraph: 'Age doesn't always bring wisdom.", + "length": 127 + }, + { + "text": "Cardinal Meisner, 79, said: 'Where are women really encouraged publicly to stay at home and bring three, four children into the world?", + "length": 134 + }, + { + "text": "Women should stay at home and have three or four children to help solve Germany's predicted population crisis, according to a cardinal.", + "length": 135 + }, + { + "text": "36 children per woman and forecasters predict the country's 83million population will shrink to 70million by 2050 if the birth date does not increase.", + "length": 150 + }, + { + "text": "Cardinal Joachim Meisner said women should have more children to boost Germany's dwindling population and said German Chancellor Angela Merkel is relying on immigration .", + "length": 170 + }, + { + "text": "He said women should be encouraged to produce more children to increase the German population and said Merkel was using immigration to solve Germany's demographic problems.", + "length": 172 + }, + { + "text": "Cardinal Joachim Meisner, the Roman Catholic Archbishop of Cologne, has criticised German Chancellor Angela Merkel's policy of trying to attract immigrants to work in the country.", + "length": 179 + }, + { + "text": "'This is what we should do and not - as Mrs Merkel is doing now - only present immigration as a solution to our demographic problems,' he said in an interview with Stuttgarter Zeitung.", + "length": 185 + }, + { + "text": "He argued that the 5,000 people on the scheme should be trained and then sent back to their countries to help rebuild after the devastating effects of the banking crisis and subsequent austerity.", + "length": 195 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4563063383102417 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:37.519965553Z", + "first_section_created": "2025-12-23T09:32:37.520297867Z", + "last_section_published": "2025-12-23T09:32:37.520482174Z", + "all_results_received": "2025-12-23T09:32:37.577062473Z", + "output_generated": "2025-12-23T09:32:37.577208579Z", + "total_processing_time_ms": 57, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 56, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:37.520297867Z", + "publish_time": "2025-12-23T09:32:37.520482174Z", + "first_worker_start": "2025-12-23T09:32:37.521133901Z", + "last_worker_end": "2025-12-23T09:32:37.576204Z", + "total_journey_time_ms": 55, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:37.521129601Z", + "start_time": "2025-12-23T09:32:37.521185903Z", + "end_time": "2025-12-23T09:32:37.521232405Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:37.521263Z", + "start_time": "2025-12-23T09:32:37.521387Z", + "end_time": "2025-12-23T09:32:37.576204Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:37.521073098Z", + "start_time": "2025-12-23T09:32:37.521133901Z", + "end_time": "2025-12-23T09:32:37.521204904Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:37.521079198Z", + "start_time": "2025-12-23T09:32:37.521154002Z", + "end_time": "2025-12-23T09:32:37.521196303Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 54, + "min_processing_ms": 54, + "max_processing_ms": 54, + "avg_processing_ms": 54, + "median_processing_ms": 54, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2776, + "slowest_section_id": 0, + "slowest_section_time_ms": 55 + } +} diff --git a/data/output/00142065a6c92f788eacd40c9023184808a7e2d1.json b/data/output/00142065a6c92f788eacd40c9023184808a7e2d1.json new file mode 100644 index 0000000..0688b7c --- /dev/null +++ b/data/output/00142065a6c92f788eacd40c9023184808a7e2d1.json @@ -0,0 +1,246 @@ +{ + "file_name": "00142065a6c92f788eacd40c9023184808a7e2d1.txt", + "total_words": 481, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "gonzalez", + "count": 12 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "judge", + "count": 8 + }, + { + "word": "he", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "Gonzalez is competent to stand trial.", + "length": 37 + }, + { + "text": "Bos told the judge: \"There is no doubt in my mind that Mr.", + "length": 58 + }, + { + "text": "But the result, finding Gonzalez not competent, can't be ignored, Collyer said in court Tuesday.", + "length": 96 + }, + { + "text": "The judge delayed arraignment for Gonzalez on new charges the government filed against him last week.", + "length": 101 + }, + { + "text": "His family has said Gonzalez, an Iraq War veteran, suffers from post-traumatic stress disorder and paranoia.", + "length": 108 + }, + { + "text": "Collyer ordered the mental health screening to be done in 30 days and set a new hearing for December 3 at 10:30 a.", + "length": 114 + }, + { + "text": "The problem, she said, was that sequestration has cut resources for the Bureau of Prisons and finding a bed can take time.", + "length": 122 + }, + { + "text": "The judge suggested that Gonzalez's mental issues, which she didn't describe more fully, could be resolved with medication.", + "length": 123 + }, + { + "text": "The incident came amid a series of disclosures about Secret Service lapses that cost the agency's director, Julia Pierson, her job.", + "length": 131 + }, + { + "text": "Collyer said that during the initial examination, the mental health screener found Gonzalez did understand some parts of the proceedings.", + "length": 137 + }, + { + "text": "David Bos, the federal public defender representing Gonzalez, objected to any examination in the first place because he says Gonzalez is fit for trial.", + "length": 151 + }, + { + "text": "The 60-minute initial mental examination of Gonzalez at the District of Columbia jail came as a surprise to the judge and to both the government and defense.", + "length": 157 + }, + { + "text": "She gave one example of an unnamed defendant who sat at the District of Columbia jail for months before anyone noticed he hadn't received the tests that were ordered.", + "length": 166 + }, + { + "text": "\" Nonetheless, he withdrew his objections and agreed to allow his client to undergo a fuller competency examination to try to undo the results of the initial examination.", + "length": 170 + }, + { + "text": "Gonzalez was arrested in September after he allegedly jumped the White House fence and sprinted into the executive mansion, setting off concerns about Secret Service security procedures.", + "length": 186 + }, + { + "text": "The judge also raised concerns that the government's handling of previous unrelated cases could mean it will take some time for Gonzalez to be examined at a federal Bureau of Prisons facility.", + "length": 192 + }, + { + "text": "Washington (CNN) -- A federal judge on Tuesday ordered a full mental competency screening for Omar Gonzalez, who is accused of jumping the White House fence, after a disputed initial examination found him not competent for trial.", + "length": 229 + }, + { + "text": "District Court Judge Rosemary Collyer expressed concern that the initial mental exam, ordered by a magistrate judge, was done before she had a chance to hear a legal motion by the defense disputing whether the magistrate had the authority to order it.", + "length": 251 + }, + { + "text": "He was found with a folding knife and told a Secret Service agent \"that he was concerned that the atmosphere was collapsing and needed to get the information to the President of the United States so that he could get the word out to the people,\" according to an agent's affidavit filed in court.", + "length": 295 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5142932534217834 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:38.020712399Z", + "first_section_created": "2025-12-23T09:32:38.021094814Z", + "last_section_published": "2025-12-23T09:32:38.021298023Z", + "all_results_received": "2025-12-23T09:32:38.085646337Z", + "output_generated": "2025-12-23T09:32:38.086005152Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:38.021094814Z", + "publish_time": "2025-12-23T09:32:38.021298023Z", + "first_worker_start": "2025-12-23T09:32:38.021898447Z", + "last_worker_end": "2025-12-23T09:32:38.084501Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:38.021897847Z", + "start_time": "2025-12-23T09:32:38.021989351Z", + "end_time": "2025-12-23T09:32:38.022046053Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:38.022118Z", + "start_time": "2025-12-23T09:32:38.022276Z", + "end_time": "2025-12-23T09:32:38.084501Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:38.021833244Z", + "start_time": "2025-12-23T09:32:38.021898447Z", + "end_time": "2025-12-23T09:32:38.02196825Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:38.021865146Z", + "start_time": "2025-12-23T09:32:38.021927348Z", + "end_time": "2025-12-23T09:32:38.021947949Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2848, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/001496f2cc6c2283fc1756c1d7280d3dee6bfb24.json b/data/output/001496f2cc6c2283fc1756c1d7280d3dee6bfb24.json new file mode 100644 index 0000000..1e54b24 --- /dev/null +++ b/data/output/001496f2cc6c2283fc1756c1d7280d3dee6bfb24.json @@ -0,0 +1,290 @@ +{ + "file_name": "001496f2cc6c2283fc1756c1d7280d3dee6bfb24.txt", + "total_words": 709, + "top_n_words": [ + { + "word": "the", + "count": 47 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "drogba", + "count": 18 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "his", + "count": 14 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "as", + "count": 11 + }, + { + "word": "chelsea", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "on", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Dan Ripley .", + "length": 12 + }, + { + "text": "What did I do?", + "length": 14 + }, + { + "text": "Follow @@Ripley_77 .", + "length": 20 + }, + { + "text": "VIDEO Chelsea need games - Mourinho .", + "length": 37 + }, + { + "text": "Shortly after, Chelsea had their best chance.", + "length": 45 + }, + { + "text": "VIDEO Scroll down to watch Chelsea need games - Mourinho .", + "length": 59 + }, + { + "text": "Not that you can blame him for having the temptation to shoot in a pre-season exercise.", + "length": 87 + }, + { + "text": "Drogba's afternoon was one of frustration as the Blues played poorly in the pre-season loss .", + "length": 93 + }, + { + "text": "Not your day: Drogba reacts by lifting up his shirt after missing a chance on his Chelsea return .", + "length": 98 + }, + { + "text": "But in a counter attacking move which started from a Bremen corner, Drogba was nowhere to be seen.", + "length": 98 + }, + { + "text": "All in: Drogba got stuck into battles as he challenges Werder Bremen's Oliver Husing for a header .", + "length": 99 + }, + { + "text": "Finding his feet: Drogba was a half-time substitute during Chelsea's 3-0 defeat by the German outfit .", + "length": 102 + }, + { + "text": "Calling the shots: The Ivory Coast striker instructs team-mates during the defeat at the Weserstadion .", + "length": 103 + }, + { + "text": "Nice moves: Drogba showed touches of skills as he turns to shake off attention from Martin Kobylanski .", + "length": 103 + }, + { + "text": "No way past: Theodor Gebre Selassie makes a tackle on Drogba who failed to hit the target on his return .", + "length": 106 + }, + { + "text": "Guess who's back: Didier Drogba made his second Chelsea debut in the pre-season friendly at Werder Bremen .", + "length": 107 + }, + { + "text": "All yours, skip: Cesc Fabregas gave Drogba the captain's armband later in the game after John Terry departed .", + "length": 110 + }, + { + "text": "Fans will hope a couple more weeks of pre-season training will get him back up to speed with the Premier League.", + "length": 112 + }, + { + "text": "With 15 minutes to play, the former Marseille hitman had his brightest moment of the half as he collected a Fabregas pass.", + "length": 122 + }, + { + "text": "A quick breakaway saw substitutes Nathan Ake and Eden Hazard combine before squaring a pass for Cesc Fabregas to chip over the crossbar.", + "length": 136 + }, + { + "text": "He was straight into the action too as he just missed out in latching on to a Fabregas pass - signs perhaps that the legs may be starting to show their age.", + "length": 156 + }, + { + "text": "Like Costa before him, Drogba simply couldn't make it work with a support cast of Torres and Salah and the duo were replaced 12 minutes into the second half.", + "length": 157 + }, + { + "text": "It looked like vintage Drogba was back, but at an angle to goal, he rifled off target at the near post with options open to him inside the box for a pass.", + "length": 157 + }, + { + "text": "Didier Drogba's previous game in a Chelsea shirt proved rather more dramatic as his final kick in a Blues jersey resulted in them winning the 2012 Champions League.", + "length": 164 + }, + { + "text": "Fans will never forget how Drogba ended his first Chelsea spell in Munich but just as many will soon forget his rather anonymous showing in Bremen on his return two years later.", + "length": 177 + }, + { + "text": "But there was to be no penalty kick heroics from the Ivorian on his return to the club, as the veteran striker drew a blank in his first game back for the Stamford Bridge outfit.", + "length": 178 + }, + { + "text": "But despite an early flurry, Chelsea failed to turn a decent spell on the ball into any serious sort of pressure and Drogba quickly faded out of the game in the central attacking role.", + "length": 184 + }, + { + "text": "A highly ineffective first half from the trio led to Jose Mourinho making changes at the break, and following a poor first half showing, expectations rose as Drogba replaced Costa for the second period.", + "length": 202 + }, + { + "text": "By that point the Blues were already 2-0 down but Drogba certainly seemed up for it as he took to the pitch wearing his old No 15 shirt, pointing to the sky with both hands before the second half resumed.", + "length": 207 + }, + { + "text": "Drogba was constantly marshalled by Bremen's Assani Lukimya and it wasn't until the 70th minute until he got back into the action - albeit with a clever run into the box ending with his cut-back easily cleared.", + "length": 211 + }, + { + "text": "Drogba ended the game as captain after John Terry departed late on, but rather than help lead a fightback, Chelsea conceded again a minute before time with the hosts scoring their second penalty of the afternoon.", + "length": 212 + }, + { + "text": "Chelsea left the 36-year-old on the bench for the start of their pre-season friendly at Werder Bremen's Weserstadion, with the Blues fielding an attacking trio of Diego Costa being supported by Fernando Torres and Mohamed Salah.", + "length": 228 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4249998927116394 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:38.522056569Z", + "first_section_created": "2025-12-23T09:32:38.523964246Z", + "last_section_published": "2025-12-23T09:32:38.524207856Z", + "all_results_received": "2025-12-23T09:32:38.592782442Z", + "output_generated": "2025-12-23T09:32:38.59296185Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:38.523964246Z", + "publish_time": "2025-12-23T09:32:38.524207856Z", + "first_worker_start": "2025-12-23T09:32:38.524714877Z", + "last_worker_end": "2025-12-23T09:32:38.591785Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:38.524674675Z", + "start_time": "2025-12-23T09:32:38.524737678Z", + "end_time": "2025-12-23T09:32:38.524829081Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:38.524963Z", + "start_time": "2025-12-23T09:32:38.5251Z", + "end_time": "2025-12-23T09:32:38.591785Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:38.524657374Z", + "start_time": "2025-12-23T09:32:38.524714877Z", + "end_time": "2025-12-23T09:32:38.524811581Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:38.524669975Z", + "start_time": "2025-12-23T09:32:38.524726577Z", + "end_time": "2025-12-23T09:32:38.524809581Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3932, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/0014ad78c460c3164520848edca4065855499355.json b/data/output/0014ad78c460c3164520848edca4065855499355.json new file mode 100644 index 0000000..b745acf --- /dev/null +++ b/data/output/0014ad78c460c3164520848edca4065855499355.json @@ -0,0 +1,290 @@ +{ + "file_name": "0014ad78c460c3164520848edca4065855499355.txt", + "total_words": 775, + "top_n_words": [ + { + "word": "the", + "count": 54 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "a", + "count": 18 + }, + { + "word": "tag", + "count": 17 + }, + { + "word": "their", + "count": 17 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "new", + "count": 13 + }, + { + "word": "bag", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "Demonstration video below...", + "length": 28 + }, + { + "text": "Heathrow Airport is also helping with the trials.", + "length": 49 + }, + { + "text": "’ He added: ‘British Airways has a long history of innovation.", + "length": 66 + }, + { + "text": "Each time the passenger flies, the electronic bar code on the tag changes.", + "length": 74 + }, + { + "text": "British Airways has begun customer-trials of new re-useable digital bag-tags.", + "length": 77 + }, + { + "text": "The battery switches off once the image is ‘fixed’ and will last around five years.", + "length": 87 + }, + { + "text": "BA said the new system will be quicker and more efficient than the traditional paper tag .", + "length": 90 + }, + { + "text": "At check-in, passengers hold their smartphone, carrying their booking details, over the tag.", + "length": 92 + }, + { + "text": "’ The company said the new system will be quicker and more efficient than the old paper tags.", + "length": 95 + }, + { + "text": "Wrestling to rip-off fiendishly sticky paper airline luggage tags could become a thing of the past.", + "length": 99 + }, + { + "text": "British Airways is trialling new electronic bag-tags, which could replace traditional sticky paper ones .", + "length": 105 + }, + { + "text": "Only when the customer checks onto a new flight will the bar code be changed and updated for that service.", + "length": 106 + }, + { + "text": "Guinea-pig passengers will use their smartphone to check in, choose their seat and obtain their mobile boarding pass.", + "length": 117 + }, + { + "text": "Guinea-pig passengers will use their smartphone to check in, choose their seat and obtain their mobile boarding pass .", + "length": 118 + }, + { + "text": "BA said the hi-tech tag can be used ‘time and time again’ with a different bar-code programmed for each new flight.", + "length": 119 + }, + { + "text": "’ The electronic tags have been developed by British Airways in partnership Densitron Displays, and Designworks Windsor.", + "length": 122 + }, + { + "text": "We’re grateful for Microsoft’s support during the trial, which will help us shape the future of checking in for flights.", + "length": 124 + }, + { + "text": "BA’s electronic bag-tag can be used by passengers who have their booking details sent to their smart-phone using the British Airways app.", + "length": 139 + }, + { + "text": "‘The digital bag tag contains all a customer’s baggage details and could eventually do away with the need for a new paper tag every time you fly.", + "length": 149 + }, + { + "text": "Employees from software giant Microsoft using Nokia Lumia Windows Phones have been selected to take part as the first passenger guinea-pigs in the month-long trial.", + "length": 164 + }, + { + "text": "The phone then transmits these details wirelessly to the tag, which then displays them on the mini-screens in the form of a bar-code and in summary form as written words.", + "length": 170 + }, + { + "text": "Each electronic bag-tag carries a special computer chip and has two small mini-computer screens on each side, similar to those on an iPhone or Kindle ‘e-reader’ device.", + "length": 172 + }, + { + "text": "BA said that over the next five years  it is investing more than £5billion in new technology, including  its first new Airbus A380 ‘superjumbo’, which arrives next month.", + "length": 177 + }, + { + "text": "The personalised electronic tags eliminate the need for a new paper version every time you fly and are now being tested by passengers flying in and out of Heathrow’s Terminal 5.", + "length": 179 + }, + { + "text": "The personalised electronic tags eliminate the need for a new paper version every time you fly and are now being tested by passengers flying in and out of Heathrow¿s Terminal 5 .", + "length": 179 + }, + { + "text": "The digital bag tag is a tangible demonstration of our commitment to developing new technology and holds the promise of making checking in for flights even quicker and smoother’.", + "length": 180 + }, + { + "text": "’ Glenn Morgan, British Airways’ head of services, said: ‘The customer trials take us another step closer to making the personalised digital bag tag a reality for our customers.", + "length": 183 + }, + { + "text": "‘Comprehensive testing of the tag has already taken place to make sure it works in a live airport environment and can stand up to the rigours of airport baggage systems and everyday travel.", + "length": 191 + }, + { + "text": "The app then automatically updates the digital bag-tag with a unique barcode, containing new flight details and an easy-to-see view of their bag’s destination just by holding the phone over it.", + "length": 195 + }, + { + "text": "’It means customers save precious time by having their electronic tag quickly scanned at the bag drop, going straight through security to relax before catching their flight,’ the spokesman said.", + "length": 198 + }, + { + "text": "A BA spokesman said: ’Once checked in, customers just need to hold their smartphone over theelectronic tag, which automatically updates with a unique barcode containing their flight details and an easy-to-see view of their bag’s destination.", + "length": 245 + }, + { + "text": "The electronic screens ‘fix’ the image on the tag for the duration of the trip – allowing it to be scanned electronically when going through luggage handling at departure, on to the plane, and through to the luggage collection carousel in the arrivals.", + "length": 258 + }, + { + "text": "British Airways’ managing director for customers, Frank van der Post, said: ‘This is a fantastically simple, yet smart device that gives each customer the choice to have their own personalised electronic baggage tag that changes with the swipe of a smartphone – every time they fly.", + "length": 288 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5372670292854309 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:39.025028205Z", + "first_section_created": "2025-12-23T09:32:39.02539372Z", + "last_section_published": "2025-12-23T09:32:39.02565153Z", + "all_results_received": "2025-12-23T09:32:39.0888933Z", + "output_generated": "2025-12-23T09:32:39.089073107Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:39.02539372Z", + "publish_time": "2025-12-23T09:32:39.02565153Z", + "first_worker_start": "2025-12-23T09:32:39.026158651Z", + "last_worker_end": "2025-12-23T09:32:39.087975Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:39.026162351Z", + "start_time": "2025-12-23T09:32:39.026235754Z", + "end_time": "2025-12-23T09:32:39.026321757Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:39.026304Z", + "start_time": "2025-12-23T09:32:39.026477Z", + "end_time": "2025-12-23T09:32:39.087975Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:39.026086748Z", + "start_time": "2025-12-23T09:32:39.026158651Z", + "end_time": "2025-12-23T09:32:39.026250454Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:39.02613065Z", + "start_time": "2025-12-23T09:32:39.026208853Z", + "end_time": "2025-12-23T09:32:39.026265055Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4663, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/0014c9152df6d0221bb4b3e5e7d3e5679345ef4e.json b/data/output/0014c9152df6d0221bb4b3e5e7d3e5679345ef4e.json new file mode 100644 index 0000000..8aa2d95 --- /dev/null +++ b/data/output/0014c9152df6d0221bb4b3e5e7d3e5679345ef4e.json @@ -0,0 +1,242 @@ +{ + "file_name": "0014c9152df6d0221bb4b3e5e7d3e5679345ef4e.txt", + "total_words": 597, + "top_n_words": [ + { + "word": "the", + "count": 31 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "data", + "count": 11 + }, + { + "word": "google", + "count": 10 + }, + { + "word": "it", + "count": 9 + }, + { + "word": "its", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "after", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "’ A Google spokesman said: ‘We’re pleased that the ICO has decided to close its investigation.", + "length": 100 + }, + { + "text": "Steve Eckersley, the ICO’s head of enforcement, said the undertaking marked a ‘significant step forward’.", + "length": 111 + }, + { + "text": "The company, which owns YouTube and G-Mail, has signed a formal undertaking that it would begin making the changes by June 30 .", + "length": 128 + }, + { + "text": "The watchdog said the new agreement would have an impact not just on Google but also on other companies which gather information online.", + "length": 136 + }, + { + "text": "We have agreed improvements to our privacy policy and will continue to work constructively with the Commissioner and his team in the future.", + "length": 140 + }, + { + "text": "The regulator - along with its continental counterparts - began looking into Google after its controversial privacy policy update in March 2012.", + "length": 144 + }, + { + "text": "Changing terms and conditions, so they are accessible and easy to understand is a critical part of our ongoing relationship with technology companies.", + "length": 150 + }, + { + "text": "It came after it was discovered that personal data had been deliberately downloaded from wi-fi networks using sophisticated equipment in the vehicles.", + "length": 150 + }, + { + "text": "Separate documents on how Google would use data collected from each of its 70 websites, including YouTube and G-Mail, were condensed into a single file.", + "length": 152 + }, + { + "text": "Britain’s data protection watchdog said the internet search giant was ‘too vague’ about the vast amounts of information being gathered about web users.", + "length": 157 + }, + { + "text": "The development comes two years after the ICO ruled the company’s guidelines must be overhauled because they were baffling and did not comply with UK laws.", + "length": 157 + }, + { + "text": "Google has been forced to change its privacy rules after it was rapped across the knuckles for illegally leaving people in the dark about how their personal details were collected and used.", + "length": 189 + }, + { + "text": "Privacy campaigners have raised serious concerns about the way online companies, particularly search engines and social media sites such as Facebook, collect and use data about their customers.", + "length": 193 + }, + { + "text": "Google has been ordered to rewrite its privacy policy after data protection watchdog ordered it to make its guidelines clearer, after deeming them 'too vague' and 'baffling' for ordinary people .", + "length": 195 + }, + { + "text": "The firm has agreed rewrite its policies to make it easier for internet users to find out how their data was hoovered up and stored after pressure from the Information Commissioner’s Office (ICO).", + "length": 198 + }, + { + "text": "Google had faced a fine of up to £500,000 or a court order – but now Kent Walker, the firm’s senior vice-president, has signed a formal undertaking that it would begin making the changes by June 30.", + "length": 203 + }, + { + "text": "But the ICO said the details were watered down, and ordinary people would have no idea after reading the file how their personal details, such as email addresses or website viewing history, would be used by Google.", + "length": 214 + }, + { + "text": "’ In July 2013, Google provoked fury when it admitted failing to destroy all sensitive information stolen from unsecured home computers by its Street View cars - harvested from millions of unsuspecting UK households .", + "length": 219 + }, + { + "text": "Now the company has vowed to provide ‘unambiguous and comprehensive information regarding data processing, including an exhaustive list of the types of data processed by Google and the purposes for which data is processed’.", + "length": 227 + }, + { + "text": "’ Renate Samson, chief executive of privacy campaigners Big Brother Watch, said: ‘As more of our lives are connected online clearly understanding where our data is being used, who can gain access and what level of control we have over our privacy and security is vital.", + "length": 273 + }, + { + "text": "He said: ‘Whilst our investigation concluded that this case hasn’t resulted in substantial damage and distress to consumers, it is still important for organisations to properly understand the impact of their actions and the requirement to comply with data protection law.", + "length": 275 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6378268599510193 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:39.526435777Z", + "first_section_created": "2025-12-23T09:32:39.526721089Z", + "last_section_published": "2025-12-23T09:32:39.526942098Z", + "all_results_received": "2025-12-23T09:32:39.583723005Z", + "output_generated": "2025-12-23T09:32:39.583913713Z", + "total_processing_time_ms": 57, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 56, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:39.526721089Z", + "publish_time": "2025-12-23T09:32:39.526942098Z", + "first_worker_start": "2025-12-23T09:32:39.527429818Z", + "last_worker_end": "2025-12-23T09:32:39.582828Z", + "total_journey_time_ms": 56, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:39.527371015Z", + "start_time": "2025-12-23T09:32:39.527429818Z", + "end_time": "2025-12-23T09:32:39.527512521Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:39.527727Z", + "start_time": "2025-12-23T09:32:39.527869Z", + "end_time": "2025-12-23T09:32:39.582828Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:39.527371015Z", + "start_time": "2025-12-23T09:32:39.527444418Z", + "end_time": "2025-12-23T09:32:39.527563123Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:39.527389416Z", + "start_time": "2025-12-23T09:32:39.527456619Z", + "end_time": "2025-12-23T09:32:39.527520721Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 54, + "min_processing_ms": 54, + "max_processing_ms": 54, + "avg_processing_ms": 54, + "median_processing_ms": 54, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3728, + "slowest_section_id": 0, + "slowest_section_time_ms": 56 + } +} diff --git a/data/output/0015194573f9b4430319683cde41e4aa17092a9d.json b/data/output/0015194573f9b4430319683cde41e4aa17092a9d.json new file mode 100644 index 0000000..496f270 --- /dev/null +++ b/data/output/0015194573f9b4430319683cde41e4aa17092a9d.json @@ -0,0 +1,202 @@ +{ + "file_name": "0015194573f9b4430319683cde41e4aa17092a9d.txt", + "total_words": 172, + "top_n_words": [ + { + "word": "the", + "count": 13 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "on", + "count": 6 + }, + { + "word": "shark", + "count": 6 + }, + { + "word": "attack", + "count": 4 + }, + { + "word": "maui", + "count": 4 + }, + { + "word": "beach", + "count": 3 + }, + { + "word": "to", + "count": 3 + }, + { + "word": "was", + "count": 3 + }, + { + "word": "with", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "Best places to swim with sharks .", + "length": 33 + }, + { + "text": "Shark found on New York subway car .", + "length": 36 + }, + { + "text": "Shark attack claims Brazilian teen's life .", + "length": 43 + }, + { + "text": "Discovery Channel defends dramatized shark special .", + "length": 52 + }, + { + "text": "The attack took place about 50 yards offshore at White Rock beach in Maui.", + "length": 74 + }, + { + "text": "Officials will assess on Thursday morning whether the beach can be reopened.", + "length": 76 + }, + { + "text": "The beach has been closed one mile on either side of where the attack happened.", + "length": 79 + }, + { + "text": "This shark attack is the fourth in Maui this year, with two happening on the same day in February, and the other in late July.", + "length": 126 + }, + { + "text": "(CNN) -- A German tourist was in critical condition after a shark severed her right arm while she snorkeled in Hawaii on Wednesday, authorities said.", + "length": 149 + }, + { + "text": "While shark attacks have been on the uptick in recent years, according to the University of Florida, the fatality rate in the United States is just 2%.", + "length": 151 + }, + { + "text": "The approximately 20-year-old woman, who was unconscious when first responders arrived, was taken to Maui Medical Center for treatment, according Lee Mainaga with the Maui Fire Department.", + "length": 188 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7471381425857544 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:40.027546138Z", + "first_section_created": "2025-12-23T09:32:40.027884651Z", + "last_section_published": "2025-12-23T09:32:40.02809336Z", + "all_results_received": "2025-12-23T09:32:40.120449212Z", + "output_generated": "2025-12-23T09:32:40.120579618Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:40.027884651Z", + "publish_time": "2025-12-23T09:32:40.02809336Z", + "first_worker_start": "2025-12-23T09:32:40.028662083Z", + "last_worker_end": "2025-12-23T09:32:40.119447Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:40.028618981Z", + "start_time": "2025-12-23T09:32:40.028682384Z", + "end_time": "2025-12-23T09:32:40.028709185Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:40.028914Z", + "start_time": "2025-12-23T09:32:40.029054Z", + "end_time": "2025-12-23T09:32:40.119447Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:40.028620881Z", + "start_time": "2025-12-23T09:32:40.028670583Z", + "end_time": "2025-12-23T09:32:40.028701185Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:40.02859868Z", + "start_time": "2025-12-23T09:32:40.028662083Z", + "end_time": "2025-12-23T09:32:40.028677084Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 90, + "min_processing_ms": 90, + "max_processing_ms": 90, + "avg_processing_ms": 90, + "median_processing_ms": 90, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1017, + "slowest_section_id": 0, + "slowest_section_time_ms": 91 + } +} diff --git a/data/output/001531a96d8e987e2206be1440d5babecf8f5596.json b/data/output/001531a96d8e987e2206be1440d5babecf8f5596.json new file mode 100644 index 0000000..7a3abba --- /dev/null +++ b/data/output/001531a96d8e987e2206be1440d5babecf8f5596.json @@ -0,0 +1,230 @@ +{ + "file_name": "001531a96d8e987e2206be1440d5babecf8f5596.txt", + "total_words": 414, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "murray", + "count": 7 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "his", + "count": 6 + }, + { + "word": "i", + "count": 6 + }, + { + "word": "is", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "' He added: 'Andy and I have been friends for nearly 15 years.", + "length": 62 + }, + { + "text": "'I'm quite proud of how we pulled it around after the US Open.", + "length": 62 + }, + { + "text": "'We were all pulling in the same direction,' he told the Daily Mail.", + "length": 68 + }, + { + "text": "Maybe the last four or five months of last year it wasn't like that.", + "length": 68 + }, + { + "text": "Murray celebrates beating Joao Sousa in the men's singles third-round match at the Australian Open .", + "length": 100 + }, + { + "text": "'Of course Andy is the one who played the matches but everyone had the goal of helping him get there.", + "length": 101 + }, + { + "text": "It was pretty hard, him qualifying for (the ATP Tour Finals in) London from his position after New York.", + "length": 104 + }, + { + "text": "' Murray (left) leans on his racquet next to trainer Valverdu (right) during a practice session in 2012 .", + "length": 105 + }, + { + "text": "Andy Murray in action against Portugal's Joao Sousa on Day Five of the Australian Open at Melbourne Park .", + "length": 106 + }, + { + "text": "I don't think the last four months are a reason we aren't working together any more, it was genuinely mutual.", + "length": 109 + }, + { + "text": "Dani Vallverdu has denied a rift with Andy Murray and insists their working relationship was ended mutually .", + "length": 109 + }, + { + "text": "Dani Vallverdu has played down talk of a rift with Andy Murray insisting the end of their working partnership was 'genuinely mutual'.", + "length": 133 + }, + { + "text": "' But Vallverdu, who is now coaching Tomas Berdych, insists that was not the case and that the relationship had simply run its course.", + "length": 134 + }, + { + "text": "Our personal relationship is still good, I consider him one of my best friends and I'll always be in his corner, except for when I am coaching against him.", + "length": 155 + }, + { + "text": "'His determination, professionalism and stubbornness, that's what has made him so successful but, because of the way he is, it gets to the point when it is time for both parties to go their own ways.", + "length": 199 + }, + { + "text": "The exit of Vallverdu raised some eyebrows as the pair met at the Sanchez-Casal Academy in Spain more than a decade ago and the Venezuelan held an increasingly important role in the coaching team under Lendl.", + "length": 208 + }, + { + "text": "Murray made a host of changes to his group last season, hiring Amelie Mauresmo as coach to replace Ivan Lendl who left his post in March and then stopping working with long-time friend Vallverdu and fitness trainer Jez Green.", + "length": 225 + }, + { + "text": "In an interview with The Independent earlier this month, Murray appeared to suggest that the relationship had become strained, saying: 'The most important point in any team is that everyone has the same vision, everyone wants to move forward together.", + "length": 251 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4254903197288513 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:40.528903808Z", + "first_section_created": "2025-12-23T09:32:40.529254822Z", + "last_section_published": "2025-12-23T09:32:40.52943453Z", + "all_results_received": "2025-12-23T09:32:40.590872826Z", + "output_generated": "2025-12-23T09:32:40.591311444Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:40.529254822Z", + "publish_time": "2025-12-23T09:32:40.52943453Z", + "first_worker_start": "2025-12-23T09:32:40.529980052Z", + "last_worker_end": "2025-12-23T09:32:40.589976Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:40.52992395Z", + "start_time": "2025-12-23T09:32:40.529980052Z", + "end_time": "2025-12-23T09:32:40.530049355Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:40.530087Z", + "start_time": "2025-12-23T09:32:40.530215Z", + "end_time": "2025-12-23T09:32:40.589976Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:40.529905849Z", + "start_time": "2025-12-23T09:32:40.529982052Z", + "end_time": "2025-12-23T09:32:40.530046955Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:40.52992735Z", + "start_time": "2025-12-23T09:32:40.529985552Z", + "end_time": "2025-12-23T09:32:40.530020654Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2313, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/001553918ab13ec4d55b7adf9175ba021fc1f7f8.json b/data/output/001553918ab13ec4d55b7adf9175ba021fc1f7f8.json new file mode 100644 index 0000000..d91805b --- /dev/null +++ b/data/output/001553918ab13ec4d55b7adf9175ba021fc1f7f8.json @@ -0,0 +1,278 @@ +{ + "file_name": "001553918ab13ec4d55b7adf9175ba021fc1f7f8.txt", + "total_words": 420, + "top_n_words": [ + { + "word": "and", + "count": 16 + }, + { + "word": "he", + "count": 15 + }, + { + "word": "the", + "count": 14 + }, + { + "word": "was", + "count": 14 + }, + { + "word": "ramirez", + "count": 12 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "joshua", + "count": 9 + }, + { + "word": "family", + "count": 8 + }, + { + "word": "his", + "count": 8 + }, + { + "word": "in", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Steve Robson .", + "length": 14 + }, + { + "text": "He died on March 17.", + "length": 20 + }, + { + "text": "He was that kind of kid.", + "length": 24 + }, + { + "text": "He was that kind of kid.", + "length": 24 + }, + { + "text": "09:00 EST, 22 March 2013 .", + "length": 26 + }, + { + "text": "13:35 EST, 22 March 2013 .", + "length": 26 + }, + { + "text": "He loved his Lord and his family.", + "length": 33 + }, + { + "text": "His family said in an obituary: 'Josh was born Feb.", + "length": 51 + }, + { + "text": "His family said in an obituary: 'Josh was born Feb.", + "length": 51 + }, + { + "text": "10, 1998, in Hereford, to Joshua John and Alma Silva Ramirez.", + "length": 61 + }, + { + "text": "10, 1998, in Hereford, to Joshua John and Alma Silva Ramirez.", + "length": 61 + }, + { + "text": "' Family friend Saul Tarango told KAMR-TV: 'He never complained.", + "length": 64 + }, + { + "text": "' Family friend Saul Tarango told KAMR-TV: 'He never complained.", + "length": 64 + }, + { + "text": "We understand that Joshua was a great kid in and out of the classroom.", + "length": 70 + }, + { + "text": "Tribute: This poster has been sent out by friends and family of Joshua Lorenzo Ramirez .", + "length": 88 + }, + { + "text": "' Joshua's uncle Jesse Ramirez told the Amarillo Globe-News, 'We’re not sure exactly what happened.", + "length": 101 + }, + { + "text": "A teenage athlete has died after being hit by a discus during the warm up for a track and field event.", + "length": 102 + }, + { + "text": "'Staff and students alike are heart broken for the Ramirez family and our loss of a student and friend.", + "length": 103 + }, + { + "text": "' He is survived by three siblings, a brother, Isaac Ramirez; and two sisters, Teresa Ramirez and Krystal Ramirez.", + "length": 114 + }, + { + "text": "Joshua went to a follow up visit two days later, on March 10, and eventually went to the hospital where he was treated and released for a bruised hip.", + "length": 150 + }, + { + "text": "Strange death: Ramirez was buried on Thursday after he passed away from complications connected with a bizarre discus accident at an area track meet .", + "length": 150 + }, + { + "text": "Joshua Ramirez, 15, was reportedly performing stretches before the competition at Plainview High School in Houston, Texas, when he was hit on the hip.", + "length": 150 + }, + { + "text": "His school's website says Joshua then started to feel unbearable pain and was taken to the Emergency Room on March 12, where he admitted into intensive care.", + "length": 157 + }, + { + "text": "Tragic loss: Ramirez, top left, was stretching before one of his events at a track meet on March 8 when he was struck in the hip with an errant discus throw .", + "length": 158 + }, + { + "text": "High school officials say he was initially checked out by an athletic trainer who examined the injury and advised him to go the hospital if he started to feel increased pain.", + "length": 174 + }, + { + "text": "He loved his Lord and his family' Tragedy: Joshua Ramirez was hit in the hip by the metal discus during the warm up for an athletics event (file photo) A statement issued by Hereford Independent Schools District reads: 'Hereford ISD is deeply saddened at this loss.", + "length": 265 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4775782823562622 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:41.030245778Z", + "first_section_created": "2025-12-23T09:32:41.030630694Z", + "last_section_published": "2025-12-23T09:32:41.030804801Z", + "all_results_received": "2025-12-23T09:32:41.100536434Z", + "output_generated": "2025-12-23T09:32:41.10067894Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:41.030630694Z", + "publish_time": "2025-12-23T09:32:41.030804801Z", + "first_worker_start": "2025-12-23T09:32:41.031331622Z", + "last_worker_end": "2025-12-23T09:32:41.099643Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:41.031333122Z", + "start_time": "2025-12-23T09:32:41.031391525Z", + "end_time": "2025-12-23T09:32:41.031460427Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:41.031509Z", + "start_time": "2025-12-23T09:32:41.031655Z", + "end_time": "2025-12-23T09:32:41.099643Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:41.03127362Z", + "start_time": "2025-12-23T09:32:41.031357123Z", + "end_time": "2025-12-23T09:32:41.031547931Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:41.03127392Z", + "start_time": "2025-12-23T09:32:41.031331622Z", + "end_time": "2025-12-23T09:32:41.031355323Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2354, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/001557fec3484c25297a1fd0db95a1592945ea5b.json b/data/output/001557fec3484c25297a1fd0db95a1592945ea5b.json new file mode 100644 index 0000000..e106292 --- /dev/null +++ b/data/output/001557fec3484c25297a1fd0db95a1592945ea5b.json @@ -0,0 +1,274 @@ +{ + "file_name": "001557fec3484c25297a1fd0db95a1592945ea5b.txt", + "total_words": 424, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "london", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "s", + "count": 7 + }, + { + "word": "with", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "that", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "co.", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "com/kwhi02 .", + "length": 12 + }, + { + "text": "Travelmail Reporter .", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "uk/ Facebook: facebook.", + "length": 23 + }, + { + "text": "’ Video courtesy: robwhitworth.", + "length": 33 + }, + { + "text": "According to a new research, London is .", + "length": 40 + }, + { + "text": "com/RobWhitworthPhotography/ Twitter: twitter.", + "length": 46 + }, + { + "text": "7million visitors in 2014, according to a new poll .", + "length": 52 + }, + { + "text": "Surging to the front: London can expect to welcome 18.", + "length": 54 + }, + { + "text": "2014, eclipsing the likes of New York, Rome, Paris, Beijing and Dubai.", + "length": 70 + }, + { + "text": "likely to pull in more visitors than any other city on the planet in .", + "length": 70 + }, + { + "text": "7million visitors in 2014, placing it at the top of the international tree.", + "length": 75 + }, + { + "text": "This was the first time in the city’s history that it has achieved a figure this high.", + "length": 88 + }, + { + "text": "The Mastercard Global Cities Index report predicts that London will welcome a staggering 18.", + "length": 92 + }, + { + "text": "The Index’s findings largely chime with figures from the Office of National Statistics (ONS).", + "length": 95 + }, + { + "text": "‘With nearly 19 million visitors [predicted] this year, London is the world’s favourite place to visit.", + "length": 107 + }, + { + "text": "Paris comes second in the list, with Istanbul, Barcelona, Amsterdam, Milan, Rome and Vienna also featuring highly.", + "length": 114 + }, + { + "text": "Beating an old rival, just: London may be the most popular city with tourists, but Paris is just behind in second .", + "length": 115 + }, + { + "text": "The ONS ‘International Passenger Survey’ for last year shows that London greeted over 16 million visitors in 2013.", + "length": 118 + }, + { + "text": "’ ‘Our city perfectly combines history, heritage, arts and culture – not to mention vast amounts of green space and major events.", + "length": 135 + }, + { + "text": "This figure also suggests that London will out-perform Bangkok – which was ranked highest in the 2013 survey – by some 300,000 incoming tourists.", + "length": 149 + }, + { + "text": "The findings have been welcomed in the UK capital, with Mayor of London Boris Johnson declaring them to be a tribute to the city’s past and present.", + "length": 150 + }, + { + "text": "This is the fourth edition of the poll – which looks at travel patterns in 132 major cities around the world, and ranks them in terms of popularity.", + "length": 150 + }, + { + "text": "The Thai capital Bangkok drops from first place in 2013 to third in 2014, but still keeps its nose ahead of Singapore and Dubai in fourth and fifth respectively.", + "length": 161 + }, + { + "text": "This year’s results make pleasing reading for tourist authorities in Europe – with the continent able to boast eight of the top 20 most popular tourist cities.", + "length": 163 + }, + { + "text": "In the last two years, it has hosted the Olympic Games, the Queen’s Diamond Jubilee celebrations and the Tour de France – events that have left it firmly in the public eye.", + "length": 176 + }, + { + "text": "And it seems that London’s position as a magnet for visitors is likely to continue, with the UK capital being ranked as the most popular destination in the world for tourists.", + "length": 177 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4231855273246765 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:41.531529345Z", + "first_section_created": "2025-12-23T09:32:41.53187986Z", + "last_section_published": "2025-12-23T09:32:41.532154171Z", + "all_results_received": "2025-12-23T09:32:41.597406422Z", + "output_generated": "2025-12-23T09:32:41.59759753Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:41.53187986Z", + "publish_time": "2025-12-23T09:32:41.532154171Z", + "first_worker_start": "2025-12-23T09:32:41.532587388Z", + "last_worker_end": "2025-12-23T09:32:41.595897Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:41.532608889Z", + "start_time": "2025-12-23T09:32:41.532672092Z", + "end_time": "2025-12-23T09:32:41.532736194Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:41.532866Z", + "start_time": "2025-12-23T09:32:41.533Z", + "end_time": "2025-12-23T09:32:41.595897Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:41.532532986Z", + "start_time": "2025-12-23T09:32:41.532594689Z", + "end_time": "2025-12-23T09:32:41.532701493Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:41.532527986Z", + "start_time": "2025-12-23T09:32:41.532587388Z", + "end_time": "2025-12-23T09:32:41.53262639Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2536, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/00156d9892fb27f1d2e100cbdd8a3997f8273781.json b/data/output/00156d9892fb27f1d2e100cbdd8a3997f8273781.json new file mode 100644 index 0000000..9c8e785 --- /dev/null +++ b/data/output/00156d9892fb27f1d2e100cbdd8a3997f8273781.json @@ -0,0 +1,230 @@ +{ + "file_name": "00156d9892fb27f1d2e100cbdd8a3997f8273781.txt", + "total_words": 304, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "have", + "count": 7 + }, + { + "word": "people", + "count": 7 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "said", + "count": 6 + }, + { + "word": "cantaloupes", + "count": 5 + }, + { + "word": "cdc", + "count": 5 + }, + { + "word": "also", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Food Poisoning 101 .", + "length": 20 + }, + { + "text": "How to keep your food safe .", + "length": 28 + }, + { + "text": "What you need to know about Listeria .", + "length": 38 + }, + { + "text": "Most of those who fell ill are more than 60 years old, the CDC said.", + "length": 68 + }, + { + "text": "By now, the cantaloupes should all be off store shelves, the CDC said.", + "length": 70 + }, + { + "text": "At least 84 people in 19 states have become ill with the bacteria, the agency said.", + "length": 83 + }, + { + "text": "Five people have died in New Mexico from eating the tainted cantaloupes, the CDC said.", + "length": 86 + }, + { + "text": "Older adults and people with compromised immune systems are also especially susceptible.", + "length": 88 + }, + { + "text": "The grower, Jensen Farms, issued a recall for its Rocky Ford-brand cantaloupes on September 14.", + "length": 95 + }, + { + "text": "Public health investigators have traced the source of the bacteria to a farm in Granada, Colorado.", + "length": 98 + }, + { + "text": "It also said that consumers should be wary of eating any cantaloupes if they don't know where they came from.", + "length": 109 + }, + { + "text": "Three people died in Colorado, two in Texas and one each in Kansas, Maryland, Missouri, Nebraska and Oklahoma.", + "length": 110 + }, + { + "text": "The agency warned that people should not eat Rocky Ford cantaloupes, even if they have eaten part of one and have not yet fallen ill.", + "length": 133 + }, + { + "text": "And the number of illnesses could still grow, added the CDC, citing reporting lags and how the disease can develop slowly in some people.", + "length": 137 + }, + { + "text": "Illnesses have also been reported in Alabama, Arkansas, California, Illinois, Indiana, Montana, North Dakota, Virginia, West Virginia, Wisconsin and Wyoming.", + "length": 157 + }, + { + "text": "On Tuesday, the CDC was reporting 13 deaths and 72 illnesses in what was already then the deadliest food-borne illness outbreak in the United States since 1998.", + "length": 160 + }, + { + "text": "Doctors also are closely monitoring the pregnancies of two women who ate contaminated cantaloupe, with the agency noting that listeriosis can cause miscarriages and stillbirths.", + "length": 177 + }, + { + "text": "(CNN) -- Fifteen people have now died after consuming cantaloupe contaminated with the listeria monocytogenes bacteria, the Centers for Disease Control and Prevention said Friday.", + "length": 179 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6398098468780518 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:42.032967119Z", + "first_section_created": "2025-12-23T09:32:42.03322893Z", + "last_section_published": "2025-12-23T09:32:42.033433538Z", + "all_results_received": "2025-12-23T09:32:42.096115885Z", + "output_generated": "2025-12-23T09:32:42.096253291Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:42.03322893Z", + "publish_time": "2025-12-23T09:32:42.033433538Z", + "first_worker_start": "2025-12-23T09:32:42.033953159Z", + "last_worker_end": "2025-12-23T09:32:42.095184Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:42.034027462Z", + "start_time": "2025-12-23T09:32:42.034096165Z", + "end_time": "2025-12-23T09:32:42.034138567Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:42.034167Z", + "start_time": "2025-12-23T09:32:42.034308Z", + "end_time": "2025-12-23T09:32:42.095184Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:42.033888157Z", + "start_time": "2025-12-23T09:32:42.033953159Z", + "end_time": "2025-12-23T09:32:42.034012862Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:42.033928658Z", + "start_time": "2025-12-23T09:32:42.033987361Z", + "end_time": "2025-12-23T09:32:42.034002961Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1853, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0015a6bcae476c0c92effb7257f5c09b047b25e0.json b/data/output/0015a6bcae476c0c92effb7257f5c09b047b25e0.json new file mode 100644 index 0000000..e1d5be5 --- /dev/null +++ b/data/output/0015a6bcae476c0c92effb7257f5c09b047b25e0.json @@ -0,0 +1,282 @@ +{ + "file_name": "0015a6bcae476c0c92effb7257f5c09b047b25e0.txt", + "total_words": 491, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "as", + "count": 9 + }, + { + "word": "rice", + "count": 9 + }, + { + "word": "is", + "count": 7 + }, + { + "word": "field", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "years ago to attract tourists.", + "length": 30 + }, + { + "text": "Visitors flock to the rice field .", + "length": 34 + }, + { + "text": "140 meters long and 100 meters wide.", + "length": 36 + }, + { + "text": "The village began the exhibition 21 .", + "length": 37 + }, + { + "text": "The villagers used nine kinds of rice .", + "length": 39 + }, + { + "text": "This year two giant murals, depicting a .", + "length": 41 + }, + { + "text": "The image of Marilyn Monroe is made from .", + "length": 42 + }, + { + "text": "Monroe, are drawing large crowds to the field.", + "length": 46 + }, + { + "text": "It is part of a famous exhibition in the area.", + "length": 46 + }, + { + "text": "filed in Inakadatemura, Aomori prefecture, northern Japan.", + "length": 58 + }, + { + "text": "the landscape at the exhibition which runs until mid-August.", + "length": 60 + }, + { + "text": "traditional Japanese courtesan and iconic Hollywood actress Marilyn .", + "length": 69 + }, + { + "text": "plants in seven colors to fashion the artworks on the field, which is .", + "length": 71 + }, + { + "text": "nine rice species with seven different colors has popped up on a rice .", + "length": 71 + }, + { + "text": "A group of geisha girls being instructed by their teacher, circa 1955 .", + "length": 71 + }, + { + "text": "in northeastern Japan to see the breathtaking artworks stretching across .", + "length": 74 + }, + { + "text": "It towered over the south end of Michigan Avenue in Chicago, Illinois, for two years.", + "length": 85 + }, + { + "text": "The Japanese are paying homage to the star in the form of a multicoloured rice field artwork.", + "length": 93 + }, + { + "text": "The famous moment in the 1955 film The Seven Year Itch is now regarded as an icon of film history.", + "length": 98 + }, + { + "text": "In the adjacent field sits a stunning artwork of a Japanese courtesan complete with intricate dress and flowers.", + "length": 112 + }, + { + "text": "And around 794 the culture of the geisha began to emerge: women who men would visit for romantic and sexual pleasure.", + "length": 117 + }, + { + "text": "As early as late 600s Japan there were female entertainers hosting gatherings, pouring sake and offering company to men.", + "length": 120 + }, + { + "text": "To the left features a stunningly detailed rice artwork of a high-class courtesan, or Oiran, with intricate embroidered dress .", + "length": 127 + }, + { + "text": "The art is made from nine rice species with seven different colours achieving realistic interpretation of light reflection and shadow .", + "length": 135 + }, + { + "text": "Monroe's famous pose was recaptured again back in 2010 in the form on a 26 foot statue called called Forever Marilyn by artist Seward Johnson.", + "length": 142 + }, + { + "text": "Known as 'saburuko' - translated as 'serving girls' - some of these women sold sexual services, while others simply hosted high-class occasions.", + "length": 144 + }, + { + "text": "Marilyn's dress, created by costume designer William Travillaup, sold for a reported £3 million in 2011 and is about to attract interest once more.", + "length": 148 + }, + { + "text": "Marilyn Monroe standing above a subway grating with her crisp white dress blowing up is one of the most well-recognised movie scenes of the 20th Century.", + "length": 153 + }, + { + "text": "Women dressed as Orian women wear high wooden clogs accompanied by attendants during a festival show of public procession at Tokyo's Asakusa district in 1999 .", + "length": 159 + }, + { + "text": "Traditional geisha - heavily made-up, immaculately dressed and coquettish entertainers - emerged in 18th century Japan, and these women did not officially sell sex - that being the preserve of a different group of female entertainers known as Oiran.", + "length": 249 + }, + { + "text": "Marylin's famous pose in the 1955 film The Seven Year Itch (left) and the rice recreation in Japan (right) The art achieves realistic interpretation of light reflection and shadow, as well as accurate dimensions - purely through skilled placement of rice seeds.", + "length": 261 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.453900009393692 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:42.534192384Z", + "first_section_created": "2025-12-23T09:32:42.534550999Z", + "last_section_published": "2025-12-23T09:32:42.534758107Z", + "all_results_received": "2025-12-23T09:32:42.5961918Z", + "output_generated": "2025-12-23T09:32:42.596337406Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:42.534550999Z", + "publish_time": "2025-12-23T09:32:42.534758107Z", + "first_worker_start": "2025-12-23T09:32:42.535495937Z", + "last_worker_end": "2025-12-23T09:32:42.595328Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:42.535465136Z", + "start_time": "2025-12-23T09:32:42.535532339Z", + "end_time": "2025-12-23T09:32:42.535615942Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:42.535608Z", + "start_time": "2025-12-23T09:32:42.535757Z", + "end_time": "2025-12-23T09:32:42.595328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:42.535457336Z", + "start_time": "2025-12-23T09:32:42.535512038Z", + "end_time": "2025-12-23T09:32:42.535607342Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:42.535433735Z", + "start_time": "2025-12-23T09:32:42.535495937Z", + "end_time": "2025-12-23T09:32:42.535533839Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2998, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0015b5e99212ec71c581a87088e602383ef682fa.json b/data/output/0015b5e99212ec71c581a87088e602383ef682fa.json new file mode 100644 index 0000000..4593748 --- /dev/null +++ b/data/output/0015b5e99212ec71c581a87088e602383ef682fa.json @@ -0,0 +1,314 @@ +{ + "file_name": "0015b5e99212ec71c581a87088e602383ef682fa.txt", + "total_words": 719, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "in", + "count": 29 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "have", + "count": 13 + }, + { + "word": "by", + "count": 11 + }, + { + "word": "election", + "count": 10 + }, + { + "word": "regime", + "count": 10 + }, + { + "word": "al", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Darren Boyle .", + "length": 14 + }, + { + "text": "I can't escape.", + "length": 15 + }, + { + "text": "their personal opinion, in a totally transparent way.", + "length": 53 + }, + { + "text": "As a result, rebel forces have called on Syrians to boycott the poll.", + "length": 69 + }, + { + "text": "Opponents to the current regime have been banned from running for election.", + "length": 75 + }, + { + "text": "Thousands of foreign fighters have travelled to Syria to join the insurgency.", + "length": 77 + }, + { + "text": "Among those involved in the fighting are large numbers of hard-line jihadists.", + "length": 78 + }, + { + "text": "Some of the trenches discovered by Syrian soldiers are more than eight feet deep .", + "length": 82 + }, + { + "text": "The election is being monitored by observers from North Korea, Brazil, Russia and Iran.", + "length": 87 + }, + { + "text": "But following a brutal crackdown, the situation deteriorated into a full-scale civil war.", + "length": 89 + }, + { + "text": "Syria has been ruled for the past 50 years by the Baath party, who are supporting al-Assad.", + "length": 91 + }, + { + "text": "Almost 2,000 people - a quarter of the population - have been killed in the heavy fighting .", + "length": 92 + }, + { + "text": "In one such airstrike, this building in Aleppo was destroyed by fighter-bombers earlier today .", + "length": 95 + }, + { + "text": "However, in areas such as Aleppo, the Syrian regime are launching airstrikes on opposition areas.", + "length": 97 + }, + { + "text": "' Peaceful opposition to the al-Assad regime started in March 2011 with Arab Spring-inspired street protests.", + "length": 109 + }, + { + "text": "Rebels used basements of adjoining buildings as a subterranean highway for moving between strategic locations .", + "length": 111 + }, + { + "text": "A Syrian army soldier secures the entrance to one of the tunnels in Jobar, in the eastern suburbs of Damascus .", + "length": 111 + }, + { + "text": "Rebels placed mirror in sections of their tunnels so they could monitor whether pro-regime forces were approaching .", + "length": 116 + }, + { + "text": "Pro-government forces today discovered a series of tunnels dug by rebels in advance of tomorrow's planned election .", + "length": 116 + }, + { + "text": "Assad faces two opponents for president, Maher al-Hajjar and Hassan al-Nuri, although neither is widely known in Syria.", + "length": 119 + }, + { + "text": "Opponents have branded the poll a 'blood election' as an estimated 162,000 people have been killed during the conflict.", + "length": 119 + }, + { + "text": "The tunnels allowed rebels to move unseen below ground and away from fighter bombers and helicopters patrolling the area .", + "length": 122 + }, + { + "text": "A pro-government newspaper has said that the regime has launched a major security operation in advance of tomorrow's poll .", + "length": 123 + }, + { + "text": "A Syrian soldier armed with a Kalashnikov AK47 equipped with a sniper scope guards the area being searched by his comrades .", + "length": 124 + }, + { + "text": "On Monday, ten people were killed in Haraqui when a truck bomb exploded in the village which is loyal to the current regime.", + "length": 124 + }, + { + "text": "In theory, 15 million people are entitled to vote but polling stations are only being opened in areas loyal to the current regime.", + "length": 130 + }, + { + "text": "Al-Assad's Information Minister Omran al-Zohbi said: 'The presidential election is a genuine occasion for all Syrians to express...", + "length": 131 + }, + { + "text": "A Syrian election branded as a farce by opponents of President Bashar al-Assad is planned for tomorrow despite the ongoing civil war.", + "length": 133 + }, + { + "text": "Pro-regime newspaper Al-Watan said: 'Military and security forces are on maximum alert to ensure the security of Syrians who wish to vote.", + "length": 138 + }, + { + "text": "This comes as Government forces have discovered secret tunnels used by rebels to move explosives and arms in advance of tomorrow's election.", + "length": 140 + }, + { + "text": "Rebels dug tunnels and blasted holes in buildings in the eastern outskirts of Damascus where they were preparing to attack government forces .", + "length": 142 + }, + { + "text": "' Large posters featuring President Bashar al-Assad have been erected in areas loyal to the regime such as Damascus in advance of tomorrow's election .", + "length": 151 + }, + { + "text": "Election observers from North Korea, Iran, Russia and Brazil have arrived in Syria to monitor the poll, which has been branded as a farce by opponents .", + "length": 152 + }, + { + "text": "The Syrian army believes that he tunnels would have been used in a major operation tomorrow attacking voters trying to cast their ballots in the election .", + "length": 155 + }, + { + "text": "Rebel forces have used tunnels to place explosives under government-held strong points using methods first developed during the First World War on the Western Front .", + "length": 166 + }, + { + "text": "One civil servant living in Damascus said he had no choice when it came to casting his ballot: 'I have to, because there is a voting station in the building where I work.", + "length": 170 + }, + { + "text": "' The regime claims that more than 9,000 polling stations are being protected although there are claims that rebel forces may target electors queuing to cast their ballots.", + "length": 172 + }, + { + "text": "In Aleppo, which has been the scene of intense fighting, 50 people, including nine children, were killed in mortar attacks believed to have been fired by rebels, according to the Syrian Observatory for Human Rights.", + "length": 215 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7474076151847839 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:43.035552812Z", + "first_section_created": "2025-12-23T09:32:43.035919427Z", + "last_section_published": "2025-12-23T09:32:43.036131436Z", + "all_results_received": "2025-12-23T09:32:43.094843716Z", + "output_generated": "2025-12-23T09:32:43.095332536Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:43.035919427Z", + "publish_time": "2025-12-23T09:32:43.036131436Z", + "first_worker_start": "2025-12-23T09:32:43.036630056Z", + "last_worker_end": "2025-12-23T09:32:43.093941Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:43.036668558Z", + "start_time": "2025-12-23T09:32:43.036779662Z", + "end_time": "2025-12-23T09:32:43.036883266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:43.036904Z", + "start_time": "2025-12-23T09:32:43.037035Z", + "end_time": "2025-12-23T09:32:43.093941Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:43.036551253Z", + "start_time": "2025-12-23T09:32:43.036630056Z", + "end_time": "2025-12-23T09:32:43.036790063Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:43.036572754Z", + "start_time": "2025-12-23T09:32:43.036640857Z", + "end_time": "2025-12-23T09:32:43.036698959Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4342, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/00169039da88af38387c7da51d021868fcb6f26c.json b/data/output/00169039da88af38387c7da51d021868fcb6f26c.json new file mode 100644 index 0000000..cb64860 --- /dev/null +++ b/data/output/00169039da88af38387c7da51d021868fcb6f26c.json @@ -0,0 +1,294 @@ +{ + "file_name": "00169039da88af38387c7da51d021868fcb6f26c.txt", + "total_words": 519, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "i", + "count": 15 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "he", + "count": 8 + }, + { + "word": "by", + "count": 7 + }, + { + "word": "copland", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "\".", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Sam Webb .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "File picture .", + "length": 14 + }, + { + "text": "But the CPS were right.", + "length": 23 + }, + { + "text": "08:20 EST, 6 November 2013 .", + "length": 28 + }, + { + "text": "07:50 EST, 6 November 2013 .", + "length": 28 + }, + { + "text": "' He added that Mr Copland was held for an hour.", + "length": 48 + }, + { + "text": "'I was cycling home in the rain after a busy day.", + "length": 49 + }, + { + "text": "It is a victory for common sense and the public purse.", + "length": 54 + }, + { + "text": "'OK, I did not have my lights on and would have admitted that.", + "length": 62 + }, + { + "text": "Police said he was taken to the police station after becoming abusive and .", + "length": 75 + }, + { + "text": "' 'It's not been a great time for me as I have just been laid off by the shop.", + "length": 78 + }, + { + "text": "I had forgotten to put the lights on my bike because the clocks had changed that weekend.", + "length": 89 + }, + { + "text": "Now the Crown Prosecution has thrown the case out in a hearing that lasted just two minutes.", + "length": 92 + }, + { + "text": "' 'I suppose they did not like it when I asked them \"haven’t you got anything better to do?", + "length": 93 + }, + { + "text": "'It was a mistake but I found myself being treated like some master criminal on a major drugs deal.", + "length": 99 + }, + { + "text": "' 'I do not believe it is in the public interest to continue with this prosecution and ask to withdraw it.", + "length": 106 + }, + { + "text": "' A police spokesman said the cyclist 'became abusive towards the officer and refused to give his details'.", + "length": 107 + }, + { + "text": "'I was told by one custody officer in the cells there were 25 others awaiting processing and I would be 26th.", + "length": 109 + }, + { + "text": "' 'I had reason to believe the police were over the top arresting me when a caution or fixed penalty would have done.", + "length": 117 + }, + { + "text": "' A police spokesman said: 'Mr Copland was stopped on Lytham Road in Blackpool for using a cycle without lights at night.", + "length": 121 + }, + { + "text": "then held in the cells before being charged and bailed over long-standing laws on the time of day a bicycle should have lights.", + "length": 127 + }, + { + "text": "A cyclist says he was treated like a 'burglar or child batterer' by police after he was arrested for riding his bicycle without lights.", + "length": 135 + }, + { + "text": "Mr Copland added: 'I was perhaps a bit sharp but it did not warrant me being taken off the streets and processed through the custody system.", + "length": 140 + }, + { + "text": "'I was treated like a burglar': Lytham Road in Blackpool, where Edward Copland, 38, of Blackpool, was stopped by officers as he cycled home .", + "length": 141 + }, + { + "text": "'He subsequently became abusive towards the officer and refused to give his details and he was therefore arrested and later charged with the offence.", + "length": 149 + }, + { + "text": "Edward Copland, 38, of Blackpool, Lancashire, was stopped by officers as he cycled along the pavement on his way home from working as a shop manager.", + "length": 149 + }, + { + "text": "Mr Copland called the arrest, which happened at about 7pm on Tuesday October 29, a 'waste of public money' adding: 'I was treated like a burglar or child batterer.", + "length": 163 + }, + { + "text": "Senior Crown Prosecutor Alison Quanbrough told magistrates at Blackpool just before Mr Copland was due to enter a plea: 'I am somewhat surprised to see this allegation in court today.", + "length": 183 + }, + { + "text": "' Justices agreed and defence lawyer Steven Duffy said: 'That is a common sense approach by my colleague - greater common sense than was shown when my client was arrested and then processed through the custody system in the cells.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5880081653594971 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:43.53694834Z", + "first_section_created": "2025-12-23T09:32:43.537244352Z", + "last_section_published": "2025-12-23T09:32:43.537427659Z", + "all_results_received": "2025-12-23T09:32:43.606487759Z", + "output_generated": "2025-12-23T09:32:43.606623964Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:43.537244352Z", + "publish_time": "2025-12-23T09:32:43.537427659Z", + "first_worker_start": "2025-12-23T09:32:43.537962781Z", + "last_worker_end": "2025-12-23T09:32:43.605584Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:43.537966581Z", + "start_time": "2025-12-23T09:32:43.538056885Z", + "end_time": "2025-12-23T09:32:43.538116187Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:43.538117Z", + "start_time": "2025-12-23T09:32:43.538253Z", + "end_time": "2025-12-23T09:32:43.605584Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:43.537896478Z", + "start_time": "2025-12-23T09:32:43.537962781Z", + "end_time": "2025-12-23T09:32:43.538072185Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:43.537914779Z", + "start_time": "2025-12-23T09:32:43.537971581Z", + "end_time": "2025-12-23T09:32:43.538015283Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2875, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/0016a2fb7a4a362e3c4e7f805f69a98cb76c1e80.json b/data/output/0016a2fb7a4a362e3c4e7f805f69a98cb76c1e80.json new file mode 100644 index 0000000..5939ffd --- /dev/null +++ b/data/output/0016a2fb7a4a362e3c4e7f805f69a98cb76c1e80.json @@ -0,0 +1,394 @@ +{ + "file_name": "0016a2fb7a4a362e3c4e7f805f69a98cb76c1e80.txt", + "total_words": 809, + "top_n_words": [ + { + "word": "the", + "count": 49 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "nook", + "count": 19 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "b", + "count": 14 + }, + { + "word": "microsoft", + "count": 14 + }, + { + "word": "n", + "count": 14 + }, + { + "word": "and", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "A .", + "length": 3 + }, + { + "text": "It .", + "length": 4 + }, + { + "text": "Revenue .", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "7 billion.", + "length": 10 + }, + { + "text": "8 million.", + "length": 10 + }, + { + "text": "Microsoft .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "authenticity.", + "length": 13 + }, + { + "text": "8 percent share.", + "length": 16 + }, + { + "text": "8 billion valuation.", + "length": 20 + }, + { + "text": "09:56 EST, 10 May 2013 .", + "length": 24 + }, + { + "text": "13:13 EST, 10 May 2013 .", + "length": 24 + }, + { + "text": "70 in afternoon trading.", + "length": 24 + }, + { + "text": "B\u0026N shares rose 22 percent to $21.", + "length": 34 + }, + { + "text": "The report also suggested that Nook .", + "length": 37 + }, + { + "text": "B\u0026N and Microsoft declined to comment.", + "length": 38 + }, + { + "text": "in a deal that valued the entire unit at $1.", + "length": 44 + }, + { + "text": "But Nook Media sales have disappointed since.", + "length": 45 + }, + { + "text": "made an offer to B\u0026N or whether B\u0026N had replied.", + "length": 48 + }, + { + "text": "There is already a Nook app for Windows 8 devices.", + "length": 50 + }, + { + "text": "fewer digital readers and tablets and had to cut prices.", + "length": 56 + }, + { + "text": "3 percent, while operating profit fell 7 percent to $115.", + "length": 57 + }, + { + "text": "The move would Microsoft in a head ion battle with Apple.", + "length": 57 + }, + { + "text": "that any deal - if one is even pending - was not imminent.", + "length": 58 + }, + { + "text": "The source said the documents dated from March, suggesting .", + "length": 60 + }, + { + "text": "In the first three quarters of the fiscal year, sales fell 0.", + "length": 61 + }, + { + "text": "B\u0026N has typically launched a new edition of the Nook every year.", + "length": 64 + }, + { + "text": "dropped 26 percent in the most recent holiday quarter as Nook sold .", + "length": 68 + }, + { + "text": "B\u0026N first indicated it might spin off the Nook business in early 2012.", + "length": 70 + }, + { + "text": "2014 in favor of distributing content via other publishers' platforms.", + "length": 70 + }, + { + "text": "source familiar with the documents cited by TechCrunch confirmed their .", + "length": 72 + }, + { + "text": "would stop selling Android-based tablets entirely by the end of fiscal .", + "length": 72 + }, + { + "text": "was not clear from the TechCrunch story whether Microsoft had formally .", + "length": 72 + }, + { + "text": "acquired its stake in the Nook Media unit a little more than a year ago .", + "length": 73 + }, + { + "text": "Microsoft is rumoured to be purchasing the business for a billion dollars .", + "length": 75 + }, + { + "text": "In December the British publisher Pearson Plc bought a stake in the unit at a $1.", + "length": 81 + }, + { + "text": "Here, Apple CEO Tim Cook introduces the new iPad during an event in San Francisco .", + "length": 83 + }, + { + "text": "The stock last traded at those levels a year ago, around the time of the Microsoft investment.", + "length": 94 + }, + { + "text": "While the college bookstore chain has provided B\u0026N with much needed cash, sales have been weak.", + "length": 95 + }, + { + "text": "Microsoft's new Surface tablets - the first of which hit the market last October - have only a 1.", + "length": 97 + }, + { + "text": "The Nook range of tablets, which range from Android HD tablets to e-book with 'paper' e-ink screens.", + "length": 100 + }, + { + "text": "Microsoft is believed to be considering a billion dollar offer for Barnes and Noble's Nook tablet business.", + "length": 107 + }, + { + "text": "Just this week B\u0026N slashed the price of its best tablet by one-third as a special promotion for Mother's Day.", + "length": 109 + }, + { + "text": "Companies like News Corp and Apple have started to make plays on the digital textbook market as that shift accelerates.", + "length": 119 + }, + { + "text": "Earlier this year, B\u0026N Chairman Leonard Riggio said he wanted to buy the company's chain of nearly 700 namesake bookstores.", + "length": 123 + }, + { + "text": "Microsoft's recent acquisitions - such as online chat service Skype and business networking site Yammer - have not been content-focused.", + "length": 136 + }, + { + "text": "Selling off Nook could simplify that process, especially if it reunited Riggio with the college bookstore business, which he sold to B\u0026N in 2009.", + "length": 145 + }, + { + "text": "The move would allow the giant to quickly launch a range of tablets under its Surface brand to take on Apple, Amazing and Google in the tablet wars.", + "length": 148 + }, + { + "text": "Shares in Barnes \u0026 Noble Inc soared 22 percent following the report Microsoft is considering an offer to acquire the tablet and e-book business of B\u0026N's Nook Media unit.", + "length": 169 + }, + { + "text": "Nook Media also includes a college bookstore chain, but the TechCrunch report suggested the Microsoft offer would include only the e-book, digital reader and tablet assets.", + "length": 172 + }, + { + "text": "If Microsoft carves the college bookstore chain out of the Nook Media unit, and B\u0026N takes back those stores, Credit Suisse said the valuation of B\u0026N could rise even higher.", + "length": 172 + }, + { + "text": "But this year it simply updated its high-definition tablets by adding Google's app store, which a number of analysts saw as an easy way for the company to launch a 'new' Nook.", + "length": 175 + }, + { + "text": "The technology website TechCrunch reported that Microsoft, which already owns a 17 percent stake in Nook Media, was proposing a $1 billion offer to buy all of Nook's digital assets.", + "length": 181 + }, + { + "text": "The latest IDC market share data for tablets, released earlier this month, leaves B\u0026N out of the industry's top five vendors, suggesting its share of the global market is negligible.", + "length": 182 + }, + { + "text": "Barclays analyst Alan Rifkin, in a note to clients on Thursday, said a lower valuation for the Nook Media unit was appropriate and that $1 billion was even higher than he had modeled.", + "length": 183 + }, + { + "text": "It was not immediately clear why Microsoft would want to buy Nook's digital assets, unless it wished to make a preemptive strike against Google and shift Nook away from the Android platform.", + "length": 190 + }, + { + "text": "The college textbook business is undergoing a transformation with the shift to digital, as publishers move away from large, heavy books that last for years to multiple packages with smaller bites of content.", + "length": 207 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5244863033294678 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:44.037647738Z", + "first_section_created": "2025-12-23T09:32:44.037986752Z", + "last_section_published": "2025-12-23T09:32:44.038214061Z", + "all_results_received": "2025-12-23T09:32:44.105610594Z", + "output_generated": "2025-12-23T09:32:44.105781501Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:44.037986752Z", + "publish_time": "2025-12-23T09:32:44.038214061Z", + "first_worker_start": "2025-12-23T09:32:44.038795885Z", + "last_worker_end": "2025-12-23T09:32:44.104685Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:44.038764784Z", + "start_time": "2025-12-23T09:32:44.038893289Z", + "end_time": "2025-12-23T09:32:44.039004393Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:44.03897Z", + "start_time": "2025-12-23T09:32:44.039141Z", + "end_time": "2025-12-23T09:32:44.104685Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:44.038725582Z", + "start_time": "2025-12-23T09:32:44.038809386Z", + "end_time": "2025-12-23T09:32:44.039106698Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:44.038695881Z", + "start_time": "2025-12-23T09:32:44.038795885Z", + "end_time": "2025-12-23T09:32:44.038841487Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4594, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0016a36506199baae4cae50cd0758e45dddf87e7.json b/data/output/0016a36506199baae4cae50cd0758e45dddf87e7.json new file mode 100644 index 0000000..3eb59d2 --- /dev/null +++ b/data/output/0016a36506199baae4cae50cd0758e45dddf87e7.json @@ -0,0 +1,238 @@ +{ + "file_name": "0016a36506199baae4cae50cd0758e45dddf87e7.txt", + "total_words": 353, + "top_n_words": [ + { + "word": "the", + "count": 19 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "was", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "on", + "count": 7 + }, + { + "word": "after", + "count": 6 + }, + { + "word": "been", + "count": 6 + }, + { + "word": "to", + "count": 6 + }, + { + "word": "with", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "5million.", + "length": 9 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "The pair were stopped at 1.", + "length": 27 + }, + { + "text": "25am on February 7 in Fore Street, Hertford.", + "length": 44 + }, + { + "text": "Sorry we are not currently accepting comments on this article.", + "length": 62 + }, + { + "text": "Last week she went public about Fairbairn's prostate cancer treatment.", + "length": 70 + }, + { + "text": "25am on Saturday morning, five miles from her £900,000 Broxbourne home.", + "length": 72 + }, + { + "text": "A Hertfordshire police spokesman told MailOnline: 'The 59-year-old was arrested at 1.", + "length": 85 + }, + { + "text": "The revelation comes just 15 months after her ex-partner Knights died from the disease.", + "length": 87 + }, + { + "text": "Her fiancée Dave Fairbairn was also arrested for a public order offence as she was breathalysed.", + "length": 97 + }, + { + "text": "The soap star was stopped in the early hours of Saturday morning, five miles from her £900,000 home .", + "length": 103 + }, + { + "text": "Soap star Gillian Taylforth has been charged with drink driving after she was arrested early on Saturday.", + "length": 105 + }, + { + "text": "Gillian Taylforth, with boyfriend David Fairbairn, has been charged with drink driving in Hertfordshire .", + "length": 105 + }, + { + "text": "'She has been charged with drink driving and bailed to return to the North \u0026 East Hertfordshire Magistrates Court on February 23.", + "length": 129 + }, + { + "text": "At the time of the court case, it was the biggest drugs raid in history after police found 839,500 ecstasy tablets worth an estimated £7.", + "length": 138 + }, + { + "text": "The couple met through friends after the breakdown of the actress' 23-year relationship with ex-fiancé Geoff Knights in 2009 and have been together since 2012.", + "length": 160 + }, + { + "text": "' The pair were stopped in Fore Street, Hertford (pictured) - Taylforth has been bailed to return to the North \u0026 East Hertfordshire Magistrates Court on February 23 .", + "length": 168 + }, + { + "text": "He was sentenced to 15 years in prison in 2003, after being found guilty of plotting to import and supply Class A drugs at Bristol Crown Court, but served just seven years of his sentence.", + "length": 188 + }, + { + "text": "' He added: 'A 59-year-old man from Cambridgeshire who was a passenger was also arrested on suspicion of a public order offence and has been issued with a fixed penalty notice and released.", + "length": 189 + }, + { + "text": "The actress, best known for her role as Kathy Beale in EastEnders, started dating the former stockbroker just a few years after he served time for importing one of the biggest ecstasy hauls ever seized in the UK.", + "length": 212 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5656057000160217 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:44.538977863Z", + "first_section_created": "2025-12-23T09:32:44.540717333Z", + "last_section_published": "2025-12-23T09:32:44.540934442Z", + "all_results_received": "2025-12-23T09:32:44.603308971Z", + "output_generated": "2025-12-23T09:32:44.603446477Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:44.540717333Z", + "publish_time": "2025-12-23T09:32:44.540934442Z", + "first_worker_start": "2025-12-23T09:32:44.541484565Z", + "last_worker_end": "2025-12-23T09:32:44.602432Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:44.541462264Z", + "start_time": "2025-12-23T09:32:44.541523966Z", + "end_time": "2025-12-23T09:32:44.541571168Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:44.541609Z", + "start_time": "2025-12-23T09:32:44.541765Z", + "end_time": "2025-12-23T09:32:44.602432Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:44.541463664Z", + "start_time": "2025-12-23T09:32:44.541520366Z", + "end_time": "2025-12-23T09:32:44.541578868Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:44.541423962Z", + "start_time": "2025-12-23T09:32:44.541484565Z", + "end_time": "2025-12-23T09:32:44.541526266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2087, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0016bfe904685ce40f094b07c294c2065dd3a194.json b/data/output/0016bfe904685ce40f094b07c294c2065dd3a194.json new file mode 100644 index 0000000..10d3124 --- /dev/null +++ b/data/output/0016bfe904685ce40f094b07c294c2065dd3a194.json @@ -0,0 +1,242 @@ +{ + "file_name": "0016bfe904685ce40f094b07c294c2065dd3a194.txt", + "total_words": 329, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "school", + "count": 7 + }, + { + "word": "were", + "count": 7 + }, + { + "word": "atlanta", + "count": 6 + }, + { + "word": "five", + "count": 6 + }, + { + "word": "high", + "count": 6 + }, + { + "word": "on", + "count": 6 + }, + { + "word": "police", + "count": 6 + }, + { + "word": "to", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Jessica Jerreat .", + "length": 17 + }, + { + "text": "Nearby Therrell High School was put on .", + "length": 40 + }, + { + "text": "activities were canceled as police began a manhunt.", + "length": 51 + }, + { + "text": "The gunman was still on the run by Tuesday evening.", + "length": 51 + }, + { + "text": "He is described as an African-American male in his mid to late teens.", + "length": 69 + }, + { + "text": "lock down for about an hour after the shooting, and all after-school .", + "length": 70 + }, + { + "text": "The brothers headed to a gas station where they asked a driver to call 911.", + "length": 75 + }, + { + "text": "Emergency measure: Therrell High School was put on lock down after reports of gunfire nearby .", + "length": 94 + }, + { + "text": "At this time four patients are listed as stable, one patient is still being evaluated,' it said.", + "length": 96 + }, + { + "text": "A spokesman for Grady Hospital, where the victims were taken, released a statement on Tuesday evening.", + "length": 102 + }, + { + "text": "Crime scene: Atlanta police examine the street where the shooting took place at about 4pm on Tuesday .", + "length": 102 + }, + { + "text": "Back up: A Crime Scene Investigation vehicle arrives as police began a manhunt for the young shooter .", + "length": 102 + }, + { + "text": "'Grady's Marcus Trauma Center received five patients this afternoon from a shooting in Southwest Atlanta.", + "length": 105 + }, + { + "text": "Atlanta police have not said whether they believe he was a student, or what the motive for the attack was.", + "length": 106 + }, + { + "text": "Investigation: Police cordon off the street close to Therrell High School, where a gunman shot five people .", + "length": 108 + }, + { + "text": "Four of the five victims were shot in the lower body, and one was hit in the chest, but all are expected to survive.", + "length": 116 + }, + { + "text": "Students were among those injured on Tuesday afternoon when a gunman opened fire near an Atlanta High School, wounding five teenagers.", + "length": 134 + }, + { + "text": "Attack: Police are looking for a gunman who injured five people, including this victim pictured, after opening fire close to an Atlanta high school .", + "length": 149 + }, + { + "text": "Four of those who were shot are students, though it is not yet clear if they were enrolled at Therrell, and their names and ages have not been released.", + "length": 152 + }, + { + "text": "Two brothers who were leaving the high school told the Atlanta Journal Constitution they saw about 20 people running away, before hearing at least five gunshots.", + "length": 161 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8469263315200806 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:45.041725845Z", + "first_section_created": "2025-12-23T09:32:45.042045358Z", + "last_section_published": "2025-12-23T09:32:45.042225365Z", + "all_results_received": "2025-12-23T09:32:45.106505571Z", + "output_generated": "2025-12-23T09:32:45.106787683Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:45.042045358Z", + "publish_time": "2025-12-23T09:32:45.042225365Z", + "first_worker_start": "2025-12-23T09:32:45.042820489Z", + "last_worker_end": "2025-12-23T09:32:45.104714Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:45.042792488Z", + "start_time": "2025-12-23T09:32:45.04282999Z", + "end_time": "2025-12-23T09:32:45.042859891Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:45.04304Z", + "start_time": "2025-12-23T09:32:45.043164Z", + "end_time": "2025-12-23T09:32:45.104714Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:45.042769187Z", + "start_time": "2025-12-23T09:32:45.042820489Z", + "end_time": "2025-12-23T09:32:45.042865091Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:45.04282969Z", + "start_time": "2025-12-23T09:32:45.042896492Z", + "end_time": "2025-12-23T09:32:45.042926194Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1924, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/001732b374f362d3961a510da315601e4b5e7e84.json b/data/output/001732b374f362d3961a510da315601e4b5e7e84.json new file mode 100644 index 0000000..a4b7097 --- /dev/null +++ b/data/output/001732b374f362d3961a510da315601e4b5e7e84.json @@ -0,0 +1,416 @@ +{ + "file_name": "001732b374f362d3961a510da315601e4b5e7e84.txt", + "total_words": 1226, + "top_n_words": [ + { + "word": "the", + "count": 85 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "irs", + "count": 22 + }, + { + "word": "that", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "said", + "count": 19 + }, + { + "word": "tax", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "2 billion in fraud.", + "length": 19 + }, + { + "text": "Russell George said Tuesday.", + "length": 28 + }, + { + "text": "5 billion were related to identity theft.", + "length": 41 + }, + { + "text": "3 billion in potentially fraudulent tax returns.", + "length": 48 + }, + { + "text": "5 million undetected tax returns with more than $5.", + "length": 51 + }, + { + "text": "In its investigation, George said, auditors found another 1.", + "length": 60 + }, + { + "text": "In addition, thieves commonly get the refunds put on debit cards.", + "length": 65 + }, + { + "text": "These instances result in the greatest burden to the legitimate taxpayer.", + "length": 73 + }, + { + "text": "\" As of last month, the IRS reported that it had stopped the issuance of $1.", + "length": 76 + }, + { + "text": "\"I cannot tell you that we will beat this problem in one year,\" Miller said.", + "length": 76 + }, + { + "text": "\"As a result, the tax return is processed, and the fraudulent refund is issued.", + "length": 79 + }, + { + "text": "\" Making the problem worse, George said, the IRS is hampered by limited resources.", + "length": 82 + }, + { + "text": "2 million tax returns it found to be fraudulent, about 940,000 returns totaling $6.", + "length": 83 + }, + { + "text": "The IRS will only select those tax returns that it can verify based on its resources.", + "length": 85 + }, + { + "text": "\"However, we have improved, and we are committed to continuing to improve our programs.", + "length": 87 + }, + { + "text": "Then, they file an online tax return using the real taxpayer's name and a fictitious income.", + "length": 92 + }, + { + "text": "His projection of $26 billion is larger than any other estimate of identity theft tax fraud.", + "length": 92 + }, + { + "text": "Last year, according to the Treasury Inspector General's Office, the IRS reported that of the 2.", + "length": 96 + }, + { + "text": "\"It also has not been able to quantify the amount of improper payments resulting from identity theft.", + "length": 101 + }, + { + "text": "George said more should be done to ensure that fraudulent tax returns are not deposited into bank accounts.", + "length": 107 + }, + { + "text": "IRS can issue the refund on that card, although some thieves have also gotten their returns on actual Treasury checks.", + "length": 118 + }, + { + "text": "The thieves know that the IRS does not verify the employer W-2s sent with the return until after the refund is issued.", + "length": 118 + }, + { + "text": "In a statement issued following George's testimony, the IRS said it \"believes that the five-year estimate is far too high.", + "length": 122 + }, + { + "text": "George's report is the first detailed analysis of the tax refund fraud problem, which could affect any legitimate taxpayer.", + "length": 123 + }, + { + "text": "\" In the past four years, he said, the IRS has identified more than 490,000 taxpayers who are the victims of identity theft.", + "length": 124 + }, + { + "text": "The individuals whose identities were stolen may not even be aware that their identities were used to file a fraudulent tax return.", + "length": 131 + }, + { + "text": "It has also issued special personal identification numbers, or PINs, to identity theft victims when they are filing future returns.", + "length": 131 + }, + { + "text": "\" In testimony prepared for Tuesday's hearing, Deputy IRS Commissioner Steven Miller said the agency cannot stop all identity theft.", + "length": 132 + }, + { + "text": "\"Even with improved identification of these returns, the next step of verifying whether the returns are fraudulent will require resources,\" he said.", + "length": 148 + }, + { + "text": "Without the necessary resources, it is unlikely that the IRS will be able to work the entire inventory of potentially fraudulent returns it identifies.", + "length": 151 + }, + { + "text": "\" In an investigation into tax refund fraud, CNN reported in March that criminals have purchased luxury cars, jewelry and plastic surgery with the money.", + "length": 153 + }, + { + "text": "\"When the identity thief files the fraudulent tax return, the IRS does not yet know that the individual's identity will be used more than once,\" George said.", + "length": 157 + }, + { + "text": "\"Without the falsely reported income, many of the deductions and/or credits used to inflate the fraudulent tax refund could not be claimed on the tax return.", + "length": 157 + }, + { + "text": "\"The primary characteristic of these cases is that the identity thief reports false income and withholding to generate a fraudulent tax return,\" George said.", + "length": 157 + }, + { + "text": "\"The IRS acknowledges that it does not know the exact number of identity theft incidents or the number of taxpayers affected by identity theft,\" George said.", + "length": 157 + }, + { + "text": "The agency also started a pilot program in April to help local law enforcement in obtaining tax return information related to ongoing criminal investigations.", + "length": 158 + }, + { + "text": "The IRS says it determined these returns were potentially fraudulent through a sampling of returns, and it does not believe any legitimate returns were included.", + "length": 161 + }, + { + "text": "\"The IRS has faced budget cuts, a hiring freeze and staffing reductions during the same time it has encountered a significant surge in identity theft refund fraud.", + "length": 163 + }, + { + "text": "\"Various new identity theft filters are in place to improve our ability to spot false returns before they are processed and before a refund is issued,\" Miller said.", + "length": 164 + }, + { + "text": "To make matters worse, the IRS is not effectively helping the victims of identity theft, George said, adding that it can take more than a year to resolve these cases.", + "length": 166 + }, + { + "text": "\" \"The estimate was based on 2010 figures, which took place before the IRS instituted major changes with the way it handles identity theft cases,\" the IRS statement read.", + "length": 170 + }, + { + "text": "First, thieves obtain Social Security numbers and other personal information from insiders at hospitals, doctor's offices, car dealerships or anywhere the information is stored.", + "length": 177 + }, + { + "text": "\" Those efforts, according to the IRS, have lead to \"stopping more refund fraud than ever before\" and \"are not reflected in the five-year projection\" by the Treasury inspector general.", + "length": 184 + }, + { + "text": "\"Our expanded screening on issues such as W-2 matching, Schedule C information, interest income and Social Security income have had a major impact on our ability to reduce identity theft fraud.", + "length": 193 + }, + { + "text": "The IRS has issued special identification numbers to taxpayers whose identities have been stolen and clamped down on abuses in filing returns under deceased taxpayers' identities and prisoners.", + "length": 193 + }, + { + "text": "\" At the same time, Miller said, there is a \"delicate balance\" in the \"need to make payments in a timely manner with the need to ensure that claims are proper and taxpayer rights are protected.", + "length": 193 + }, + { + "text": "\"I can tell you that we have committed our talents and resources to prevent the issuance of fraudulent returns and have developed processes to minimize the pain felt by those who have been victimized.", + "length": 200 + }, + { + "text": "\" The scope of the problem is illustrated by what George said his auditors found for tax year 2010, in which 48,357 Social Security numbers were used multiple times as a primary taxpayer identification number.", + "length": 209 + }, + { + "text": "\"Direct deposits should not be made to debit cards issued by financial institutions and debit card administration companies that do not take sufficient steps to authenticate individuals' identities,\" George said.", + "length": 212 + }, + { + "text": "We can and will continue to work to prevent the issuance of fraudulent refunds, and we can and will continue to work with innocent taxpayers to clear their accounts and/or get them money faster in a courteous and professional manner.", + "length": 233 + }, + { + "text": "But in testimony before Congress last year, National Taxpayer Advocate Nina Olson said those filters \"inevitably block large numbers of proper refund claims\" since there \"is no easy way to distinguish proper claims from improper ones.", + "length": 234 + }, + { + "text": "(CNN) -- Criminals who file fraudulent tax returns by stealing people's identities could rake in an estimated $26 billion over the next five years because the IRS cannot keep up with the amount of the fraud, Treasury Inspector General J.", + "length": 237 + }, + { + "text": "The IRS maintains it has certain filters in place at the start of the tax filing season to prevent and detect identity theft and fraud, and it says it has recently trained additional employees across the country to deal with the problem.", + "length": 237 + }, + { + "text": "\"Our analysis found that, although the IRS detects and prevents a large number of fraudulent refunds based on false income documents, there is much fraud that it does not detect,\" said George's prepared testimony before a joint hearing of the House Ways and Means Subcommittees on Oversight and Social Security.", + "length": 311 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7293168604373932 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:45.543005467Z", + "first_section_created": "2025-12-23T09:32:45.544598632Z", + "last_section_published": "2025-12-23T09:32:45.544986348Z", + "all_results_received": "2025-12-23T09:32:45.631189342Z", + "output_generated": "2025-12-23T09:32:45.631444553Z", + "total_processing_time_ms": 88, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:45.544598632Z", + "publish_time": "2025-12-23T09:32:45.544861443Z", + "first_worker_start": "2025-12-23T09:32:45.545398964Z", + "last_worker_end": "2025-12-23T09:32:45.630196Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:45.545427166Z", + "start_time": "2025-12-23T09:32:45.545508669Z", + "end_time": "2025-12-23T09:32:45.545603073Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:45.545571Z", + "start_time": "2025-12-23T09:32:45.545711Z", + "end_time": "2025-12-23T09:32:45.630196Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:45.545401164Z", + "start_time": "2025-12-23T09:32:45.545482968Z", + "end_time": "2025-12-23T09:32:45.545567971Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:45.545314061Z", + "start_time": "2025-12-23T09:32:45.545398964Z", + "end_time": "2025-12-23T09:32:45.545445666Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:45.544907344Z", + "publish_time": "2025-12-23T09:32:45.544986348Z", + "first_worker_start": "2025-12-23T09:32:45.545423965Z", + "last_worker_end": "2025-12-23T09:32:45.616593Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:45.545473567Z", + "start_time": "2025-12-23T09:32:45.545518569Z", + "end_time": "2025-12-23T09:32:45.545568671Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:45.54572Z", + "start_time": "2025-12-23T09:32:45.545863Z", + "end_time": "2025-12-23T09:32:45.616593Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:45.545439866Z", + "start_time": "2025-12-23T09:32:45.545484768Z", + "end_time": "2025-12-23T09:32:45.545641974Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:45.545371663Z", + "start_time": "2025-12-23T09:32:45.545423965Z", + "end_time": "2025-12-23T09:32:45.545444466Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 154, + "min_processing_ms": 70, + "max_processing_ms": 84, + "avg_processing_ms": 77, + "median_processing_ms": 84, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3704, + "slowest_section_id": 0, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/001789cf9b865dcac3d9fc032a6b1533e3318eda.json b/data/output/001789cf9b865dcac3d9fc032a6b1533e3318eda.json new file mode 100644 index 0000000..354d647 --- /dev/null +++ b/data/output/001789cf9b865dcac3d9fc032a6b1533e3318eda.json @@ -0,0 +1,266 @@ +{ + "file_name": "001789cf9b865dcac3d9fc032a6b1533e3318eda.txt", + "total_words": 511, + "top_n_words": [ + { + "word": "the", + "count": 44 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "video", + "count": 10 + }, + { + "word": "as", + "count": 8 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "mexico", + "count": 8 + }, + { + "word": "that", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "There's a smog alert.", + "length": 21 + }, + { + "text": "Protesters clash with riot police.", + "length": 34 + }, + { + "text": "Stop superficially fixing the country.", + "length": 38 + }, + { + "text": "Stop working for your party, and not for us.", + "length": 44 + }, + { + "text": "\" Some of the candidates have watched the video.", + "length": 48 + }, + { + "text": "The robbers hand their loot over to a corrupt police officer.", + "length": 61 + }, + { + "text": "CNN's Krupskaia Alis contributed to this report from Mexico City.", + "length": 65 + }, + { + "text": "The video has garnered nearly 2 million views in less than four days.", + "length": 69 + }, + { + "text": "\"We've had the response we've hoped for, and even exceeded it,\" Martinez said.", + "length": 78 + }, + { + "text": "Some viewers have criticized it as political manipulation, others as a wake-up call.", + "length": 84 + }, + { + "text": "Seeing children act out these grown-up situations has elicited a number of responses.", + "length": 85 + }, + { + "text": "A child dressed as a businessman gets robbed at gunpoint as soon as he leaves his home.", + "length": 87 + }, + { + "text": "But the list of Mexico's woes weighs heavily: security, pollution and poverty, among others.", + "length": 92 + }, + { + "text": "Our Mexico of the Future will release data based on the millions of messages it has received.", + "length": 93 + }, + { + "text": "Drug traffickers have it out with police on the streets, and human smugglers unload their cargo.", + "length": 96 + }, + { + "text": "\" A young girl with brown eyes and long brown hair, wearing a simple white shirt, poses the question.", + "length": 101 + }, + { + "text": "Martinez said to expect that security and the environment are the two most popular themes mentioned by Mexicans.", + "length": 112 + }, + { + "text": "Behind her is a small army of child actors who star in the video, which is cute for a moment, but deadly serious.", + "length": 113 + }, + { + "text": "So far, more than 10 million Mexicans have written or recorded their dreams for a safer or cleaner or more tolerant Mexico.", + "length": 123 + }, + { + "text": "In it, the children act out a day in the life of a Mexican resident, fraught with all the problems and challenges that a leader must face.", + "length": 138 + }, + { + "text": "Leftist candidate Lopez Obrador, of the Party of the Democratic Revolution, had not seen the video, but said he agreed with the theme of change.", + "length": 144 + }, + { + "text": "The goal of the organization is to collect as many \"visions\" of Mexico's future as possible and to compile them in a book that will be presented to the candidates before the election.", + "length": 183 + }, + { + "text": "\"In reality, the video is not reflecting anything that people have not experienced,\" said Rosenda Martinez, a spokeswoman for Our Mexico of the Future, the group behind the production.", + "length": 184 + }, + { + "text": "The video targets the major presidential candidates -- Enrique Pena Nieto, Josefina Vazquez Mota and Andres Manuel Lopez Obrador -- and aims to raise awareness about Our Mexico of the Future.", + "length": 191 + }, + { + "text": "After the children in the video depict a kidnapping, posters of the missing and people wearing face masks because of smog, the young narrator continues: \"If this is the future that awaits me, I don't want it.", + "length": 208 + }, + { + "text": "(CNN) -- A short video that has gone viral in Mexico asks a tough question of the country's presidential candidates: \"Are you striving only for the (presidential) chair, or will you change the future of our country?", + "length": 215 + }, + { + "text": "Vazquez Mota, of the ruling National Action Party, said the video's message can't go unnoticed, while Institutional Revolutionary Party candidate Pena Nieto expressed that now is the time for change, as the video suggests.", + "length": 222 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4804544448852539 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:46.045148625Z", + "first_section_created": "2025-12-23T09:32:46.045508439Z", + "last_section_published": "2025-12-23T09:32:46.045692847Z", + "all_results_received": "2025-12-23T09:32:46.111432012Z", + "output_generated": "2025-12-23T09:32:46.111596519Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:46.045508439Z", + "publish_time": "2025-12-23T09:32:46.045692847Z", + "first_worker_start": "2025-12-23T09:32:46.046243869Z", + "last_worker_end": "2025-12-23T09:32:46.110495Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:46.046317672Z", + "start_time": "2025-12-23T09:32:46.046430877Z", + "end_time": "2025-12-23T09:32:46.04650708Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:46.046479Z", + "start_time": "2025-12-23T09:32:46.046622Z", + "end_time": "2025-12-23T09:32:46.110495Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:46.046284571Z", + "start_time": "2025-12-23T09:32:46.046362174Z", + "end_time": "2025-12-23T09:32:46.046451278Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:46.046186067Z", + "start_time": "2025-12-23T09:32:46.046243869Z", + "end_time": "2025-12-23T09:32:46.046281371Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2953, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0017b6fcb795640d05b89be7b5b5e2e13168083f.json b/data/output/0017b6fcb795640d05b89be7b5b5e2e13168083f.json new file mode 100644 index 0000000..1a40a79 --- /dev/null +++ b/data/output/0017b6fcb795640d05b89be7b5b5e2e13168083f.json @@ -0,0 +1,420 @@ +{ + "file_name": "0017b6fcb795640d05b89be7b5b5e2e13168083f.txt", + "total_words": 820, + "top_n_words": [ + { + "word": "the", + "count": 45 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "000", + "count": 21 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "tuna", + "count": 12 + }, + { + "word": "are", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "with", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "com.", + "length": 4 + }, + { + "text": "Org .", + "length": 5 + }, + { + "text": "Adam .", + "length": 6 + }, + { + "text": "fisheries.", + "length": 10 + }, + { + "text": "75 million!", + "length": 11 + }, + { + "text": "- WorldWildlife.", + "length": 16 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "com, which teamed up with .", + "length": 27 + }, + { + "text": "- Ermitage, Vin de Paille, M.", + "length": 29 + }, + { + "text": "The Valentine’s Day dinner was .", + "length": 34 + }, + { + "text": "Adam Simmonds and GreatBritishChefs.", + "length": 36 + }, + { + "text": "food and the perfect mood for romance.", + "length": 38 + }, + { + "text": "Chapoutier, 1997 - Half Bottle - £100 .", + "length": 40 + }, + { + "text": "And the fish are disappearing as a result.", + "length": 42 + }, + { + "text": "unveiled by the luxury website VeryFirstTo.", + "length": 43 + }, + { + "text": "- Chateau D’Yquem, 1975 - Bottle - £400 .", + "length": 44 + }, + { + "text": "- Salon Blanc de Blancs, 1959 - Bottle - £3,600 .", + "length": 50 + }, + { + "text": "- Salon Blanc de Blancs, 1976 - Magnum - £3,000 .", + "length": 50 + }, + { + "text": "- Quinta do Noval, Nacional, 1966 - Bottle - £1,400 .", + "length": 54 + }, + { + "text": "Bluefin are the largest tuna and can live up to 40 years.", + "length": 57 + }, + { + "text": "They migrate across oceans and can dive more than 4,000 feet.", + "length": 61 + }, + { + "text": "- Chateauneuf-du-Pape, Chateau Rayas, 1990 Bottle - £1,200 .", + "length": 61 + }, + { + "text": "In total, the wine bill alone costs a wallet-busting £30,700.", + "length": 62 + }, + { + "text": "They hunt by sight and have the sharpest vision of any bony fish.", + "length": 65 + }, + { + "text": "- Le Montrachet, Domaine de la Romanee-Conti, 1991 - Bottle - £4,000 .", + "length": 71 + }, + { + "text": "said 'Each dish has been created with romance in mind and to allow the .", + "length": 72 + }, + { + "text": "spectacular ingredients to work their magic, in both creating delicious .", + "length": 73 + }, + { + "text": "It also features bluefin tuna, which some wildlife campaigners claim are .", + "length": 74 + }, + { + "text": "- La Romanee-Conti, Domaine de la Romanee-Conti, 1990 - Bottle - £17,000 .", + "length": 75 + }, + { + "text": "The finale of the meal is vanilla and smoked chocolate with kopi luwak ice cream.", + "length": 81 + }, + { + "text": "Workers harvesting bluefin tuna from Maricultura's tuna pens near Ensenada, Mexico .", + "length": 84 + }, + { + "text": "Driven by such high prices, fishermen use even more refined techniques to catch tuna.", + "length": 85 + }, + { + "text": "Although tuna do provide food and livelihoods for people, they are more than just seafood.", + "length": 90 + }, + { + "text": "In the Mediterranean, WWF has been working for over 10 years to protect Atlantic bluefin tuna.", + "length": 94 + }, + { + "text": "We work to stop overfishing and ensure recovery of the Eastern Atlantic and Mediterranean stock.", + "length": 96 + }, + { + "text": "Tuna are a top predator in the marine food chain, maintaining a balance in the ocean environment.", + "length": 97 + }, + { + "text": "A number of the ingredients, including watermelon, are supposed to share similarities with Viagra.", + "length": 98 + }, + { + "text": "There are three species of bluefin: Atlantic (the largest and most endangered), Pacific, and Southern.", + "length": 102 + }, + { + "text": "Michelin starred chef Adam Simmonds came up with the eight-course menu for a ‘no expense-spared’ romantic.", + "length": 110 + }, + { + "text": "The meal also comes with confit foie gras, smoked eel, rissotto with carbineros prawns, saffron and silver leaf.", + "length": 112 + }, + { + "text": "The most expensive Valentine's Day menu ever was unveiled today featuring a stomach-churning price tag of £61,000 .", + "length": 116 + }, + { + "text": "Bluefin tuna are made for speed: built like torpedoes, have retractable fins and their eyes are set flush to their body.", + "length": 120 + }, + { + "text": "The Atlantic bluefin is a highly sought-after delicacy for sushi and sashimi in Asia—a single fish has sold for over $1.", + "length": 122 + }, + { + "text": "They are tremendous predators from the moment they hatch, seeking out schools of fish like herring, mackerel and even eels.", + "length": 123 + }, + { + "text": "And these aphrodisiacs may come in handy when it comes to the extensive drinks menu, which come courtesy of Corney \u0026 Barrow.", + "length": 124 + }, + { + "text": "To wash the food down, there is a £17,000 bottle of La Romanee-Conti wine and a magnum of 1976 Salon Blanc de Blancs worth £3,000.", + "length": 132 + }, + { + "text": "The world's most expensive Valentine’s Day menu has been unveiled today, with the extravagant dinner costing a stomach-churning £61,000.", + "length": 139 + }, + { + "text": "Most catches of the Atlantic bluefin tuna are taken from the Mediterranean Sea, which is the most important bluefin tuna fishery in the world.", + "length": 142 + }, + { + "text": "Marcel Knobil, founder of VeryFirstTo, added: 'The menu awakes all the senses and the luxury element further intensifies the pleasure of this meal.", + "length": 147 + }, + { + "text": "It comes with almus white caviar worth £3,000, Natives South Sea oysters including pearls at a cost of £6,000, £2,000 worth of gold leaf and truffles costing £1,000.", + "length": 169 + }, + { + "text": "as close to extinction as the panda or white rhino, but an alternative course of Pickled Mackerel, Cucumber, Horseradish and Charcoal is available should the customer have sensitivities around the fish.", + "length": 202 + }, + { + "text": "' Chef Adam Simmonds designed the eight-course menu, that includes almus white caviar worth £3,000, South Sea pearls (right) at a cost of £6,000, £2,000 worth of gold leaf and truffles costing £1,000 .", + "length": 205 + }, + { + "text": "'VeryFirstTo will also be enhancing the experience by covering the table in rose petals; releasing a dozen white doves, lighting 50 rose scented candles, and entertaining the couple with a harpist and a romantic poet.", + "length": 217 + }, + { + "text": "' - Almus White Caviar - £3,000 - Pata Negra Iberico jambon de Bellotta - £180- Spring white truffle - £1,000- Pacific Blue Fin Tuna - £120- Goose Foie Gras - £120- English Saffron - £200- Carabineros Prawns - £120- Gold Leaf - £2,000- Silver Leaf - £800- Wagyu Beef - £200- Truffled Brillat Savarin - £80- Perigord Truffle - £500- Mast Brothers Ltd Reserve Vanilla and Smoked Chocolate - £150 - Kopi Luwak Coffee - £350 - Amethyst Bamboo salt - £2,000- Flora Danica China - £2,400- Native South Sea oysters including pearls -£6,000- Dry Ice - £200 .", + "length": 568 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6621213257312775 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:46.546452348Z", + "first_section_created": "2025-12-23T09:32:46.546766361Z", + "last_section_published": "2025-12-23T09:32:46.547132876Z", + "all_results_received": "2025-12-23T09:32:46.628194762Z", + "output_generated": "2025-12-23T09:32:46.62837827Z", + "total_processing_time_ms": 81, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 81, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:46.546766361Z", + "publish_time": "2025-12-23T09:32:46.547010071Z", + "first_worker_start": "2025-12-23T09:32:46.547498091Z", + "last_worker_end": "2025-12-23T09:32:46.627328Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:46.547423388Z", + "start_time": "2025-12-23T09:32:46.547498091Z", + "end_time": "2025-12-23T09:32:46.547615895Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:46.547776Z", + "start_time": "2025-12-23T09:32:46.547982Z", + "end_time": "2025-12-23T09:32:46.627328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:46.547505991Z", + "start_time": "2025-12-23T09:32:46.547605195Z", + "end_time": "2025-12-23T09:32:46.547754601Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:46.54747409Z", + "start_time": "2025-12-23T09:32:46.547536492Z", + "end_time": "2025-12-23T09:32:46.547584594Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:46.547035272Z", + "publish_time": "2025-12-23T09:32:46.547132876Z", + "first_worker_start": "2025-12-23T09:32:46.54748909Z", + "last_worker_end": "2025-12-23T09:32:46.58024Z", + "total_journey_time_ms": 33, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:46.547556193Z", + "start_time": "2025-12-23T09:32:46.547594094Z", + "end_time": "2025-12-23T09:32:46.547599195Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:46.54785Z", + "start_time": "2025-12-23T09:32:46.547996Z", + "end_time": "2025-12-23T09:32:46.58024Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 32 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:46.547564293Z", + "start_time": "2025-12-23T09:32:46.547608695Z", + "end_time": "2025-12-23T09:32:46.547611495Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:46.547449889Z", + "start_time": "2025-12-23T09:32:46.54748909Z", + "end_time": "2025-12-23T09:32:46.54749029Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 111, + "min_processing_ms": 32, + "max_processing_ms": 79, + "avg_processing_ms": 55, + "median_processing_ms": 79, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2512, + "slowest_section_id": 0, + "slowest_section_time_ms": 80 + } +} diff --git a/data/output/0017d4562fa7de28a7654e89d93a656fc08faefd.json b/data/output/0017d4562fa7de28a7654e89d93a656fc08faefd.json new file mode 100644 index 0000000..08bc93b --- /dev/null +++ b/data/output/0017d4562fa7de28a7654e89d93a656fc08faefd.json @@ -0,0 +1,226 @@ +{ + "file_name": "0017d4562fa7de28a7654e89d93a656fc08faefd.txt", + "total_words": 326, + "top_n_words": [ + { + "word": "s", + "count": 11 + }, + { + "word": "career", + "count": 9 + }, + { + "word": "his", + "count": 9 + }, + { + "word": "messi", + "count": 9 + }, + { + "word": "newell", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "the", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "old", + "count": 7 + }, + { + "word": "argentina", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Age: 27 .", + "length": 9 + }, + { + "text": "Career goals: 403 .", + "length": 19 + }, + { + "text": "Nationality: Argentinian .", + "length": 26 + }, + { + "text": "VIDEO All Star XI: Lionel Messi highlights .", + "length": 44 + }, + { + "text": "'But the last two years I spent with Newell's Old Boys were the best of my life.", + "length": 80 + }, + { + "text": "VIDEO Scroll down to watch 8 year old Messi scoring sensational lob for Newell's Old Boys .", + "length": 91 + }, + { + "text": "Lionel Messi celebrates adding to his impressive goal tally against Rayo Vallecano on the weekend .", + "length": 99 + }, + { + "text": "Messi celebrates winning the Champions League in 2008 - one of three European triumphs with Barcelona .", + "length": 103 + }, + { + "text": "'I don't see him wearing any other shirt in Argentina than that of Newell's and he will retire at Newell's.", + "length": 107 + }, + { + "text": "Heinze, in Argentina training with Messi, also ended his career back in South America with Newell's Old Boys .", + "length": 110 + }, + { + "text": "' Former Argentina team-mate Gabriel Heinze (right) believes Messi will go to his homeland to finish his career .", + "length": 113 + }, + { + "text": "The Barcelona star began his career in the South American club's youth side, making five appearances, before heading to the Nou Camp aged 12.", + "length": 141 + }, + { + "text": "Lionel Messi will finish his stunning career back in his homeland with Newell's Old Boys according to former Argentina team-mate Gabriel Heinze.", + "length": 144 + }, + { + "text": "' Heinze also ended his career at Newell's Old Boys and added: 'I had played very few games in Argentina and never thought of going back there to finish my career.", + "length": 163 + }, + { + "text": "Messi has surpassed 400 career goals with Argentina and Barcelona during a trophy-laden career but Heinze believes his heart is set on a return home in the future.", + "length": 163 + }, + { + "text": "Personal honours: 4 FIFA Ballon d’Or awards (2009-12) Former Manchester United, Real Madrid and PSG defender Heinze told Marca: 'Messi will retire at Newell's, there's no need to convince him of anything.", + "length": 207 + }, + { + "text": "Clubs: Newell’s Old Boys (Youth ranks 1995-2000), Barcelona (2000-present) Major honours: 6 La Liga titles (2004-05, 2005-06, 2008-09, 2009-10, 2010-11, 2012-13) 3 Champions Leagues (2005-06, 2008-09, 2010-11), 2 Spanish Cups.", + "length": 229 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.7135595679283142 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:47.048232791Z", + "first_section_created": "2025-12-23T09:32:47.050328476Z", + "last_section_published": "2025-12-23T09:32:47.050507283Z", + "all_results_received": "2025-12-23T09:32:47.108836348Z", + "output_generated": "2025-12-23T09:32:47.108969953Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:47.050328476Z", + "publish_time": "2025-12-23T09:32:47.050507283Z", + "first_worker_start": "2025-12-23T09:32:47.051079006Z", + "last_worker_end": "2025-12-23T09:32:47.107998Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:47.051082906Z", + "start_time": "2025-12-23T09:32:47.05116141Z", + "end_time": "2025-12-23T09:32:47.051201611Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:47.051255Z", + "start_time": "2025-12-23T09:32:47.051392Z", + "end_time": "2025-12-23T09:32:47.107998Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:47.051064706Z", + "start_time": "2025-12-23T09:32:47.051141809Z", + "end_time": "2025-12-23T09:32:47.051196111Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:47.051026904Z", + "start_time": "2025-12-23T09:32:47.051079006Z", + "end_time": "2025-12-23T09:32:47.051098507Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1862, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/0018963c6bfd060dbbbd6633a5655712d90643fb.json b/data/output/0018963c6bfd060dbbbd6633a5655712d90643fb.json new file mode 100644 index 0000000..2f64f16 --- /dev/null +++ b/data/output/0018963c6bfd060dbbbd6633a5655712d90643fb.json @@ -0,0 +1,652 @@ +{ + "file_name": "0018963c6bfd060dbbbd6633a5655712d90643fb.txt", + "total_words": 1281, + "top_n_words": [ + { + "word": "the", + "count": 109 + }, + { + "word": "and", + "count": 43 + }, + { + "word": "of", + "count": 40 + }, + { + "word": "to", + "count": 39 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "is", + "count": 20 + }, + { + "word": "it", + "count": 18 + }, + { + "word": "s", + "count": 18 + }, + { + "word": "has", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "month.", + "length": 6 + }, + { + "text": "Cutsem.", + "length": 7 + }, + { + "text": "village.", + "length": 8 + }, + { + "text": "cottage.", + "length": 8 + }, + { + "text": "grounds.", + "length": 8 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "It's got lovely .", + "length": 17 + }, + { + "text": "Rebecca English .", + "length": 17 + }, + { + "text": "'It's nice inside.", + "length": 18 + }, + { + "text": "go on in the village.", + "length": 21 + }, + { + "text": "Rescue helicopter pilot.", + "length": 24 + }, + { + "text": "It's got quite a lot of .", + "length": 25 + }, + { + "text": "Queen's Sandringham House .", + "length": 27 + }, + { + "text": "years old when then move in.", + "length": 28 + }, + { + "text": "02:46 EST, 11 January 2013 .", + "length": 28 + }, + { + "text": "06:58 EST, 10 January 2013 .", + "length": 28 + }, + { + "text": "made to shroud it in secrecy.", + "length": 29 + }, + { + "text": "isolated but surrounding the .", + "length": 30 + }, + { + "text": "Anmer Hall is one of the 150 .", + "length": 30 + }, + { + "text": "The club is home to darts and .", + "length": 31 + }, + { + "text": "Residents in the tiny village of .", + "length": 34 + }, + { + "text": "listed on the council’s website.", + "length": 34 + }, + { + "text": "Earlier this week it was reported .", + "length": 35 + }, + { + "text": "However later this year they will .", + "length": 35 + }, + { + "text": "It's beautiful here all year round.", + "length": 35 + }, + { + "text": "proposition to the Duke and Duchess.", + "length": 36 + }, + { + "text": "The village of Anmer has no pub or .", + "length": 36 + }, + { + "text": "'Anyone who wants to see the plans .", + "length": 36 + }, + { + "text": "At the moment the main entrance to .", + "length": 36 + }, + { + "text": "When they are not on Anglesey, they .", + "length": 37 + }, + { + "text": "A spokeswoman for King’s Lynn and .", + "length": 37 + }, + { + "text": "Anmer Hall which largely dates back .", + "length": 37 + }, + { + "text": "The current tenant is James Everett, .", + "length": 38 + }, + { + "text": "A decision on whether to approve the .", + "length": 38 + }, + { + "text": "Although much of the land is open to .", + "length": 38 + }, + { + "text": "Villager Neville Warnes, 82, who has .", + "length": 38 + }, + { + "text": "The application was submitted by the .", + "length": 38 + }, + { + "text": "News of the new planning application .", + "length": 38 + }, + { + "text": "'Everybody gets on with everybody and .", + "length": 39 + }, + { + "text": "And anyone who wants to see the plans .", + "length": 39 + }, + { + "text": "frontage, facing south, and it's homely.", + "length": 40 + }, + { + "text": "' William and Kate currently live in a .", + "length": 40 + }, + { + "text": "Buckingham Palace has declined to comment.", + "length": 42 + }, + { + "text": "year, but details were only revealed today.", + "length": 43 + }, + { + "text": "and is only two miles east of Sandringham House.", + "length": 48 + }, + { + "text": "council offices in the presence of a planning officer.", + "length": 54 + }, + { + "text": "council offices in the presence of a planning officer.", + "length": 54 + }, + { + "text": "sometime before March 11,  instead of local councillors.", + "length": 57 + }, + { + "text": "That's an ideal place for them to live and rear a family.", + "length": 57 + }, + { + "text": "made and detailed drawings of the plans have been submitted.", + "length": 60 + }, + { + "text": "which has a Latin motto translating as 'Never make me sail'.", + "length": 60 + }, + { + "text": "The couple are currently based in Kensington Palace, West London .", + "length": 66 + }, + { + "text": "Camilla Parker Bowles, now the Duchess of Cornwall, used to enjoy .", + "length": 67 + }, + { + "text": "has to be vetted by the police first and can only view them at the .", + "length": 68 + }, + { + "text": "plans is set to be made by council officers using delegated powers .", + "length": 68 + }, + { + "text": "social club which is open on Friday evenings and Saturday lunchtimes.", + "length": 69 + }, + { + "text": "She added: 'I would not move back to a town now I live here in Anmer.", + "length": 69 + }, + { + "text": "rented farmhouse on Anglesey, where the prince is an RAF Search and .", + "length": 69 + }, + { + "text": "secret trysts, when it was rented by the prince’s friend Hugh van .", + "length": 69 + }, + { + "text": "lived in Anmer for 74 years and has been in the hall several times, .", + "length": 69 + }, + { + "text": "move into the newly renovated Apartment 1A inside the palace itself, .", + "length": 70 + }, + { + "text": "that the 10 bedroom residence was being lined up by the monarch as a .", + "length": 70 + }, + { + "text": "Sandringham estate office on behalf of the Queen on December 10 last .", + "length": 70 + }, + { + "text": "dominoes teams and is the headquarters of the Anmer Mere Yacht Club, .", + "length": 70 + }, + { + "text": "the hall is about 100 yards down a tiny lane which is also used as a .", + "length": 70 + }, + { + "text": "the sheep and get a general feeling of the countryside and rural life.", + "length": 70 + }, + { + "text": "2017, which means William and Kate’s son or daughter would be three .", + "length": 71 + }, + { + "text": "it would be nice if they would participate in some of the things that .", + "length": 71 + }, + { + "text": "shop and had a population of just 63 in the 2001 census, but it has a .", + "length": 71 + }, + { + "text": "country home for Kate and William, who are expecting their first child.", + "length": 71 + }, + { + "text": "which was Princess Margaret’s home, while Prince Harry will have the .", + "length": 72 + }, + { + "text": "family relaxing there – making Anmer Hall even more of an attractive .", + "length": 72 + }, + { + "text": "London, which has just one bedroom and has been described as ‘cosy’.", + "length": 72 + }, + { + "text": "said: \"I can't think that it would be anything other than good for the .", + "length": 72 + }, + { + "text": "live at Nottingham Cottage, in the grounds of Kensington Palace in West .", + "length": 73 + }, + { + "text": "the public, the paparazzi are no longer allowed to photograph the Royal .", + "length": 73 + }, + { + "text": "hall is the park where the child, or the children, will see the cattle, .", + "length": 73 + }, + { + "text": "has to be vetted by the police first and they can only view them at the .", + "length": 73 + }, + { + "text": "Country pile: Anmer Hall is a late Georgian country house which sits in .", + "length": 73 + }, + { + "text": "the grounds of the Sandringham estate and is just two miles east of the .", + "length": 73 + }, + { + "text": "will only heighten such speculation, particularly as attempts have been .", + "length": 73 + }, + { + "text": "to 1802 was one of the houses where Prince Charles and his then mistress .", + "length": 74 + }, + { + "text": "Anmer have been informed of the application, but no objections have been .", + "length": 74 + }, + { + "text": "who owns a kitchen and timber company, and has leased the property until .", + "length": 74 + }, + { + "text": "properties owned by the Queen on her 20,000 acre estate in north Norfolk .", + "length": 74 + }, + { + "text": "West Norfolk Borough Council said: 'A full planning application has been .", + "length": 74 + }, + { + "text": "public road to St Mary the Virgin Church, where services are held twice a .", + "length": 75 + }, + { + "text": "Picturesque: An aerial view of the country house owned by the Queen in Norfolk .", + "length": 80 + }, + { + "text": "'I would make them very welcome and I'm sure other people in the village would as well.", + "length": 87 + }, + { + "text": "Sprawling estate: The royal couple are expected to be given the country house by the Queen .", + "length": 92 + }, + { + "text": "It was earlier leased to the Duke and Duchess of Kent as their country house from 1972 until 1990.", + "length": 98 + }, + { + "text": "A planning application has been submitted to the council to make changes to the Georgian property .", + "length": 99 + }, + { + "text": "The village of Anmer lies between King's Lynn and Hunstanton, on the edge of the Sandringham estate .", + "length": 101 + }, + { + "text": "Charming: The Duke spent much of his childhood holidays exploring the Grade II listed house, pictured .", + "length": 103 + }, + { + "text": "The Queen and the Duke of Edinburgh stay at Sandringham every Christmas with the rest of the Royal family .", + "length": 107 + }, + { + "text": "'The drawings and detail of the plans are not being revealed on the council’s website for security reasons.", + "length": 109 + }, + { + "text": "Anmar Hall was lived in by the Duke and Duchess of Kent from 1972 to 1990 and is now leased to James Everett .", + "length": 110 + }, + { + "text": "The new apartment will suit the couple’s growing family, as it boasts five bedrooms and three reception rooms.", + "length": 112 + }, + { + "text": "William spent many childhood holidays playing in the grounds with the van Cutsem boys, who are all close friends.", + "length": 113 + }, + { + "text": "The Sandringham Estate has been owned by the Queen since her accession in 1952 and by the Royal Family since 1862.", + "length": 114 + }, + { + "text": "It is believed that the couple want to change the driveway to give them more privacy and improve access to the hall.", + "length": 116 + }, + { + "text": "Couple: The Duke and Duchess of Cambridge pictured as the Duchess left hospital after a severe bout of morning sickness.", + "length": 120 + }, + { + "text": "Drawings and detail of the plans are not being revealed on the council’s website for security reasons in the normal way.", + "length": 122 + }, + { + "text": "'The application has had its normal consultation period of 21 days and it will probably be dealt with under delegated powers.", + "length": 125 + }, + { + "text": "It is surrounded by estate farmland, giving William and Kate a high level of privacy as they gear up to start their own family.", + "length": 127 + }, + { + "text": "' Karen Melhado, who moved to the village from York in 2007, described the prospect of Royal neighbours as 'absolutely wonderful'.", + "length": 130 + }, + { + "text": "Details of picturesque Anmar Hall (pictured) but anyone who wants to see the plans submitted to the council has to be vetted by the police .", + "length": 140 + }, + { + "text": "Royal officials have submitted a secret planning application for substantial changes at the Norfolk mansion the Queen is reportedly planning to gift to the Duke and Duchess of Cambridge.", + "length": 186 + }, + { + "text": "King’s Lynn and West Norfolk Borough Council is being asked to approve plans to re-route the existing driveway in order to make it more private as well as construct a new garden room at Grade II listed Anmer Hall on her private Sandringham Estate.", + "length": 249 + }, + { + "text": "The application, which is described as a ‘major development’ in council papers, also involves converting a wood store into accommodation – which could be used for police guarding the couple - and extending an existing garage block to form an equipment room.", + "length": 263 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.47145022451877594 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:47.551273085Z", + "first_section_created": "2025-12-23T09:32:47.553018356Z", + "last_section_published": "2025-12-23T09:32:47.553393071Z", + "all_results_received": "2025-12-23T09:32:47.638683328Z", + "output_generated": "2025-12-23T09:32:47.638895737Z", + "total_processing_time_ms": 87, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 85, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:47.553018356Z", + "publish_time": "2025-12-23T09:32:47.553253665Z", + "first_worker_start": "2025-12-23T09:32:47.55387309Z", + "last_worker_end": "2025-12-23T09:32:47.632522Z", + "total_journey_time_ms": 79, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:47.553839289Z", + "start_time": "2025-12-23T09:32:47.553911792Z", + "end_time": "2025-12-23T09:32:47.554017496Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:47.554076Z", + "start_time": "2025-12-23T09:32:47.554216Z", + "end_time": "2025-12-23T09:32:47.632522Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 78 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:47.553802187Z", + "start_time": "2025-12-23T09:32:47.55387309Z", + "end_time": "2025-12-23T09:32:47.553988595Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:47.553778486Z", + "start_time": "2025-12-23T09:32:47.55387939Z", + "end_time": "2025-12-23T09:32:47.553987395Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:47.553299267Z", + "publish_time": "2025-12-23T09:32:47.553393071Z", + "first_worker_start": "2025-12-23T09:32:47.553941593Z", + "last_worker_end": "2025-12-23T09:32:47.637456Z", + "total_journey_time_ms": 84, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:47.553900991Z", + "start_time": "2025-12-23T09:32:47.553941893Z", + "end_time": "2025-12-23T09:32:47.553991595Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:47.554089Z", + "start_time": "2025-12-23T09:32:47.554246Z", + "end_time": "2025-12-23T09:32:47.637456Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 83 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:47.553933093Z", + "start_time": "2025-12-23T09:32:47.553968294Z", + "end_time": "2025-12-23T09:32:47.554084299Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:47.553898091Z", + "start_time": "2025-12-23T09:32:47.553941593Z", + "end_time": "2025-12-23T09:32:47.553964094Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 161, + "min_processing_ms": 78, + "max_processing_ms": 83, + "avg_processing_ms": 80, + "median_processing_ms": 83, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3645, + "slowest_section_id": 1, + "slowest_section_time_ms": 84 + } +} diff --git a/data/output/00189f37b1c8bdc2b132b40270bb28ffcc622af1.json b/data/output/00189f37b1c8bdc2b132b40270bb28ffcc622af1.json new file mode 100644 index 0000000..776faf8 --- /dev/null +++ b/data/output/00189f37b1c8bdc2b132b40270bb28ffcc622af1.json @@ -0,0 +1,464 @@ +{ + "file_name": "00189f37b1c8bdc2b132b40270bb28ffcc622af1.txt", + "total_words": 1107, + "top_n_words": [ + { + "word": "the", + "count": 60 + }, + { + "word": "to", + "count": 44 + }, + { + "word": "in", + "count": 30 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "isis", + "count": 22 + }, + { + "word": "s", + "count": 17 + }, + { + "word": "syria", + "count": 16 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "on", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "when, not if.", + "length": 13 + }, + { + "text": "ISIS oil money .", + "length": 16 + }, + { + "text": "1) Who killed James Foley?", + "length": 26 + }, + { + "text": "\" ISIS threat to the West .", + "length": 27 + }, + { + "text": "4) Will ISIS attack the West?", + "length": 29 + }, + { + "text": "5) Can the ISIS money flow be stopped?", + "length": 38 + }, + { + "text": "Here are key questions on the matter: .", + "length": 39 + }, + { + "text": "\"It's expressed the intent,\" Hayden said.", + "length": 41 + }, + { + "text": "Is Obama heading toward airstrikes in Syria?", + "length": 44 + }, + { + "text": "Lindsey Graham of South Carolina said Monday.", + "length": 45 + }, + { + "text": "\" Graham, a consistent advocate for increased U.", + "length": 48 + }, + { + "text": "Syria ready to cooperate with UN to fight terror .", + "length": 50 + }, + { + "text": "Republicans urge airstrikes in Syria to defeat ISIS .", + "length": 53 + }, + { + "text": "and European Union support, officials have made clear.", + "length": 54 + }, + { + "text": "-backed opposition is offering to help him take on ISIS.", + "length": 56 + }, + { + "text": "3) Will the Syrian regime that Obama opposes help fight ISIS?", + "length": 61 + }, + { + "text": "\"But it's one that has global ambitions -- and it has the tools.", + "length": 64 + }, + { + "text": "Britain close to identifying James Foley's killer, ambassador says .", + "length": 68 + }, + { + "text": "2) Will the United States expand air strikes to ISIS targets in Syria?", + "length": 70 + }, + { + "text": "Last week, Defense Secretary Chuck Hagel and Joint Chiefs Chairman Gen.", + "length": 71 + }, + { + "text": "Linguists said that based on his voice, the man sounds to be younger than 30.", + "length": 77 + }, + { + "text": "To some in the United States, especially critics of Obama, an ISIS attack on U.", + "length": 79 + }, + { + "text": "\" Last week, Atlantic Council senior fellow and Syria expert Frederic Hof said a U.", + "length": 83 + }, + { + "text": "It also has grabbed millions robbing banks including an Iraqi central bank in Mosul.", + "length": 84 + }, + { + "text": "Intervening in Syria also could result in some strange geopolitical bedfellows, he noted.", + "length": 89 + }, + { + "text": "The President also would seek to forge a coalition including regional allies as well as U.", + "length": 90 + }, + { + "text": "Obama wants al-Assad out of power, but now the Syrian leader engaged in a civil war against a U.", + "length": 96 + }, + { + "text": "Martin Dempsey said that taking on ISIS in Syria was the only way to defeat the Sunni jihadists.", + "length": 96 + }, + { + "text": "\"So if you intervene, you may be helping Iran and Hezbollah and (Syrian President Bashar al-Assad's) regime.", + "length": 108 + }, + { + "text": "He also appears to have been educated in England from a young age and to be from southern England or London.", + "length": 108 + }, + { + "text": "Now the focus in America and abroad has become what will President Barack Obama and other leaders do about it?", + "length": 110 + }, + { + "text": "\"The sort of legal barrier that prohibited doing something inside Syria now seems to have evaporated,\" Hof said.", + "length": 112 + }, + { + "text": "A White House spokesman said last week that Obama would consult with Congress before taking such a step in Syria.", + "length": 113 + }, + { + "text": "Officials say the group can get about $3 million a day by selling discounted oil from fields it has seized in Iraq.", + "length": 115 + }, + { + "text": "He couldn't elaborate on the identity of the killer, who is seen decapitating Foley in a video posted last week on YouTube.", + "length": 123 + }, + { + "text": "The Syrian offer to help fight ISIS comes after al-Assad's government enabled the group to expand amid the Syrian civil war.", + "length": 124 + }, + { + "text": "ISIS fighters have attacked the Syrian opposition fighting government forces, but also have seized some government territory.", + "length": 125 + }, + { + "text": "Pressure is increasing on Obama to go after ISIS in both Iraq and Syria, ignoring an essentially non-existent border between them.", + "length": 130 + }, + { + "text": "\"We're putting a great deal into the search,\" he said, referring to the use of sophisticated technology to analyze the man's voice.", + "length": 131 + }, + { + "text": "\"The White House is trying to minimize the threat we face in order to justify not changing a failed strategy,\" conservative GOP Sen.", + "length": 132 + }, + { + "text": "Foreign Minister Walid Moallem said Monday his government would accept support from the United States and others working under the U.", + "length": 133 + }, + { + "text": "\"There's no more powerful way to express their street credentials among the jihadist community than a successful attack against the West.", + "length": 137 + }, + { + "text": "military might, told CNN on Sunday that \"it's about time now to assume the worst about these guys, rather than to be underestimating them.", + "length": 138 + }, + { + "text": "In the video, Foley, 40, is seen kneeling next to a man dressed in black, who speaks with what experts say is a distinctly English accent.", + "length": 138 + }, + { + "text": "CNN National Security Analyst Peter Bergen said it will be difficult to defeat ISIS without ground forces, something Obama clearly opposes.", + "length": 139 + }, + { + "text": "Bank robbery, kidnapping, smuggling, selling oil on the black market -- ISIS gets money to fund and expand its organization in all kinds of ways.", + "length": 145 + }, + { + "text": "\"Two of the most effective fighting forces in Syria are al Qaeda or al Qaeda splinter groups, or groups like Hezbollah, backed by Iran,\" Bergen said.", + "length": 149 + }, + { + "text": "Obama \"has not made any decision to order military action in Syria,\" White House spokesman Josh Earnest said Monday, but the speculation and insistence continued.", + "length": 162 + }, + { + "text": "\" There's no clear consensus inside the intelligence community as to whether ISIS, which calls itself the Islamic State, is currently capable of striking the West.", + "length": 163 + }, + { + "text": "\"ISIS is a very powerful local organization, and probably a reasonably powerful regional terrorist organization,\" former CIA chief Michael Hayden told CNN on Sunday.", + "length": 165 + }, + { + "text": "\" Obama already sent military advisers to Iraq and launched air strikes to protect them and minority groups from ISIS fighters rampaging through the country's north.", + "length": 165 + }, + { + "text": "ISIS, as the Islamic State jihadists in Iraq and Syria are known, has become the new face of international terrorism in the eyes of the United States and its Western allies.", + "length": 173 + }, + { + "text": "Western allies can reduce the group's income by refusing to pay ransom for abducted citizens and pressuring regional governments to crack down on wealthy citizens sending money to it.", + "length": 183 + }, + { + "text": "umbrella to fight \"terrorists\" -- a code word for the group that calls itself the Islamic State and seeks to establish a caliphate across a Sunni-dominated swath of the the Middle East.", + "length": 185 + }, + { + "text": "For Obama, the step would reverse his refusal for three years to get involved militarily in Syria despite pressure from his own advisers, including former Secretary of State Hillary Clinton.", + "length": 190 + }, + { + "text": "The United States is working with governments in the region, including Kuwait, Qatar and Saudi Arabia, to stop such private donations, State Department spokeswoman Marie Harf said last week.", + "length": 190 + }, + { + "text": "Al-Assad's military recently launched its own air strikes on ISIS positions, amounting to what Hof described as a dispute between crime gangs over money -- in this case, from oil fields occupied by ISIS.", + "length": 203 + }, + { + "text": "Moallem, however, warned against any unilateral action or strikes in Syrian territory without its permission, saying \"any effort to fight terrorism should be done in coordination\" with the \"Syrian government.", + "length": 208 + }, + { + "text": "Britain's ambassador to the United States, Peter Westmacott, told CNN on Sunday that British officials were close to identifying the ISIS militant who beheaded Foley, an American journalist captured in Syria in 2012.", + "length": 216 + }, + { + "text": "rescue mission for Foley earlier this year that went into Syria but failed to find him established the precedent for military action across the Iraq border, superseding any legal considerations such as being asked by the host government to enter.", + "length": 246 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5991850793361664 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:48.054207874Z", + "first_section_created": "2025-12-23T09:32:48.05631116Z", + "last_section_published": "2025-12-23T09:32:48.056602071Z", + "all_results_received": "2025-12-23T09:32:48.164791757Z", + "output_generated": "2025-12-23T09:32:48.165022667Z", + "total_processing_time_ms": 110, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 108, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:48.05631116Z", + "publish_time": "2025-12-23T09:32:48.056524868Z", + "first_worker_start": "2025-12-23T09:32:48.057190995Z", + "last_worker_end": "2025-12-23T09:32:48.12507Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:48.057136493Z", + "start_time": "2025-12-23T09:32:48.057211796Z", + "end_time": "2025-12-23T09:32:48.0572968Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:48.057409Z", + "start_time": "2025-12-23T09:32:48.057552Z", + "end_time": "2025-12-23T09:32:48.12507Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:48.057214096Z", + "start_time": "2025-12-23T09:32:48.0573156Z", + "end_time": "2025-12-23T09:32:48.057583011Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:48.057134893Z", + "start_time": "2025-12-23T09:32:48.057190995Z", + "end_time": "2025-12-23T09:32:48.0572971Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:48.056545969Z", + "publish_time": "2025-12-23T09:32:48.056602071Z", + "first_worker_start": "2025-12-23T09:32:48.057164594Z", + "last_worker_end": "2025-12-23T09:32:48.163837Z", + "total_journey_time_ms": 107, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:48.057187795Z", + "start_time": "2025-12-23T09:32:48.057230397Z", + "end_time": "2025-12-23T09:32:48.057285899Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:48.057409Z", + "start_time": "2025-12-23T09:32:48.057535Z", + "end_time": "2025-12-23T09:32:48.163837Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 106 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:48.057180995Z", + "start_time": "2025-12-23T09:32:48.057226397Z", + "end_time": "2025-12-23T09:32:48.057271098Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:48.057112692Z", + "start_time": "2025-12-23T09:32:48.057164594Z", + "end_time": "2025-12-23T09:32:48.057181695Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 173, + "min_processing_ms": 67, + "max_processing_ms": 106, + "avg_processing_ms": 86, + "median_processing_ms": 106, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3290, + "slowest_section_id": 1, + "slowest_section_time_ms": 107 + } +} diff --git a/data/output/0018b611996c4ea42c7f4b6d9a2f7379855aa4a2.json b/data/output/0018b611996c4ea42c7f4b6d9a2f7379855aa4a2.json new file mode 100644 index 0000000..5adf801 --- /dev/null +++ b/data/output/0018b611996c4ea42c7f4b6d9a2f7379855aa4a2.json @@ -0,0 +1,242 @@ +{ + "file_name": "0018b611996c4ea42c7f4b6d9a2f7379855aa4a2.txt", + "total_words": 429, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "3d", + "count": 7 + }, + { + "word": "could", + "count": 6 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "it", + "count": 6 + }, + { + "word": "on", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "com about the potential of 3D printing.", + "length": 39 + }, + { + "text": "'There’s been a lot of buzz recently.", + "length": 39 + }, + { + "text": "What if we had a digital catalogue of spare parts for items that you’d bought?", + "length": 80 + }, + { + "text": "The blueprints for the gun were then posted on a website and downloaded over 100,000 times.", + "length": 91 + }, + { + "text": "'How about letting kids design their own toys and then actually being able to get them made?", + "length": 92 + }, + { + "text": "'They could be printed on demand and ready for you by the time you’d finished your shopping.", + "length": 94 + }, + { + "text": "Ideas include digitally making clothing, furniture, personal gifts and even food in their shops.", + "length": 96 + }, + { + "text": "He said: '3D printing] could revolutionise the way we view stores and what we can get from them.", + "length": 96 + }, + { + "text": "3D printing is burgeoning industry with all manner of objects, such as this gun, now being created.", + "length": 99 + }, + { + "text": "The supermarket giant is keen to use new technologies to offer a wider range of products to consumers.", + "length": 102 + }, + { + "text": "The high street retail giant is working on developing new technology for a variety of products in its stores.", + "length": 109 + }, + { + "text": "You could even take a broken item into store; we could scan it in 3D, repair it digitally and make you a new one.", + "length": 113 + }, + { + "text": "The Mail on Sunday also printed their own gun and demonstrated how easy it was to get it through immigration and security.", + "length": 122 + }, + { + "text": "Company researchers believe 3D printers are a natural progression given that they already offer photo and poster printing .", + "length": 123 + }, + { + "text": "Paul Wilkinson, a lead research specialist with Tesco, revealed the retail giant’s ambitions writing a blog post on tesco.", + "length": 124 + }, + { + "text": "It’s great that we have one of the machines to test out and to be able to look at how they might change the way stores work.", + "length": 126 + }, + { + "text": "The project could also see Tesco stores repair broken items or print spare parts for a product that has already been purchased.", + "length": 127 + }, + { + "text": "' This is not the first time 3D technology has attracted headlines after it was revealed recently a printed gun had been digitally made.", + "length": 136 + }, + { + "text": "Customers could soon design their own items, go into a supermarket and have them printed in 3D, if an ambitious major project by Tesco succeeds.", + "length": 144 + }, + { + "text": "' He said the Tesco team would be meeting “some of the big names and getting together with lots of start-ups and trying to find that idea or product that might just change the retail world.", + "length": 191 + }, + { + "text": "' Wilkinson, who is heading on a fact-finding mission to San Francisco with a team from Tesco, added in The Grocer magazine: 'We already print photos and posters in many of our larger stores, so why not other gifts and personalised items?", + "length": 238 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5380838513374329 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:48.557341472Z", + "first_section_created": "2025-12-23T09:32:48.557640584Z", + "last_section_published": "2025-12-23T09:32:48.557803491Z", + "all_results_received": "2025-12-23T09:32:48.616121255Z", + "output_generated": "2025-12-23T09:32:48.616281161Z", + "total_processing_time_ms": 58, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:48.557640584Z", + "publish_time": "2025-12-23T09:32:48.557803491Z", + "first_worker_start": "2025-12-23T09:32:48.558400515Z", + "last_worker_end": "2025-12-23T09:32:48.615164Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:48.558334612Z", + "start_time": "2025-12-23T09:32:48.558405815Z", + "end_time": "2025-12-23T09:32:48.558456217Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:48.558651Z", + "start_time": "2025-12-23T09:32:48.558787Z", + "end_time": "2025-12-23T09:32:48.615164Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:48.558335112Z", + "start_time": "2025-12-23T09:32:48.558405815Z", + "end_time": "2025-12-23T09:32:48.558461117Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:48.55827721Z", + "start_time": "2025-12-23T09:32:48.558400515Z", + "end_time": "2025-12-23T09:32:48.558425616Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2398, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/001953f437e71dedb22650cb78e0d6c429a1503c.json b/data/output/001953f437e71dedb22650cb78e0d6c429a1503c.json new file mode 100644 index 0000000..8535671 --- /dev/null +++ b/data/output/001953f437e71dedb22650cb78e0d6c429a1503c.json @@ -0,0 +1,254 @@ +{ + "file_name": "001953f437e71dedb22650cb78e0d6c429a1503c.txt", + "total_words": 538, + "top_n_words": [ + { + "word": "the", + "count": 50 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "university", + "count": 17 + }, + { + "word": "top", + "count": 11 + }, + { + "word": "are", + "count": 10 + }, + { + "word": "universities", + "count": 10 + }, + { + "word": "hong", + "count": 8 + }, + { + "word": "is", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Amanda Williams .", + "length": 17 + }, + { + "text": "07:37 EST, 11 June 2013 .", + "length": 25 + }, + { + "text": "09:28 EST, 11 June 2013 .", + "length": 25 + }, + { + "text": "Warwick, in third place, breaks up the Asian domination.", + "length": 56 + }, + { + "text": "' Europe still holds its own in the list, with 24 of the 50 but there are just four from North America.", + "length": 103 + }, + { + "text": "Britain’s ‘young’ universities are now rated as among the best in the world by international analysts.", + "length": 108 + }, + { + "text": "Australia, with nine, is the only single country with more establishments in the 50 than Britain’s impressive standing.", + "length": 121 + }, + { + "text": "Lancaster University is tenth in the list and only one American centre, The University of California, Irvine (UCI) makes the top 20.", + "length": 132 + }, + { + "text": "Two more technology centres in Korea and one in Japan are also in the top ten, showing where the balance of power is heading, said QS experts.", + "length": 142 + }, + { + "text": "Asian universities dominate the top of the list, making up five of the top six places and an indication of how the balance is changing across the globe.", + "length": 152 + }, + { + "text": "Top of the overall list is the Hong Kong University of Science \u0026 Technology followed by Singapore’s Nanyang Technological University with Warwick in third.", + "length": 157 + }, + { + "text": "And it shows that it is not just the older seats of learning such as Cambridge, Oxford, Durham and London that the rest of the world looks at with admiration.", + "length": 158 + }, + { + "text": "The seven UK institutions are in the global ‘50 under 50’ - a list of the top universities that have only been going since 1963 across the whole of the planet.", + "length": 163 + }, + { + "text": "Warwick along with Lancaster, make the top ten while five others, Brunel, Bath, Aston, Essex and Loughborough are in the places below in the QS University Rankings .", + "length": 165 + }, + { + "text": "And new universities also hold their own against their more established rivals as all 50 institutions are also in the global top 350 of all universities compiled by QS.", + "length": 168 + }, + { + "text": "Two British universities, Warwick and Lancaster, make the top ten while five others, Brunel, Bath, Aston, Essex and Loughborough are in the places below in the QS University Rankings.", + "length": 183 + }, + { + "text": "Warwick University is one of seven British universities which are in the global ¿50 under 50¿ - a list of the top institutions that have only been going since 1963 across the whole of the planet .", + "length": 198 + }, + { + "text": "Ben Sowter, head of the QS Intelligence Unit, said: 'Asian higher education is undergoing a rapid transformation, and Singapore, Hong Kong, China and Korea are at the forefront of the assault on the global academic elite.", + "length": 221 + }, + { + "text": "'There are already 17 per cent more Asian universities in the Top 200 of the QS World University Rankings since the recession, and the next two decades could see leading US and European universities objectively overtaken.", + "length": 221 + }, + { + "text": "1 The Hong Kong University of Science and Technology (Hong Kong - 1991)2 Nanyang Technological University (Singapore - 1991)3 The University of Warwick (GB - 1964) 4 Korea Advanced Institute of Science and Technology (Korea - 1971) 5 City University of Hong Kong (Hong Kong - 1984) 6 Pohang University of Science and Technology (Korea - 1986) 7 Maastricht University (Netherlands - 1976)8 University of California, Irvine (USA - 1965) 9 The Hong Kong Polytechnic University (Hong Kong - 1994) 10 Lancaster University (GB - 1964) International employment and education consultants QS, analysed data for the ‘young’ universities including their academic record and their reputation with firms and students.", + "length": 708 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5850337147712708 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:49.058556492Z", + "first_section_created": "2025-12-23T09:32:49.065274264Z", + "last_section_published": "2025-12-23T09:32:49.065516374Z", + "all_results_received": "2025-12-23T09:32:49.131750459Z", + "output_generated": "2025-12-23T09:32:49.131905965Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 6, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:49.065274264Z", + "publish_time": "2025-12-23T09:32:49.065516374Z", + "first_worker_start": "2025-12-23T09:32:49.066026495Z", + "last_worker_end": "2025-12-23T09:32:49.130866Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:49.066042895Z", + "start_time": "2025-12-23T09:32:49.066130199Z", + "end_time": "2025-12-23T09:32:49.066193701Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:49.066336Z", + "start_time": "2025-12-23T09:32:49.0665Z", + "end_time": "2025-12-23T09:32:49.130866Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:49.065986893Z", + "start_time": "2025-12-23T09:32:49.066060996Z", + "end_time": "2025-12-23T09:32:49.066128899Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:49.065951992Z", + "start_time": "2025-12-23T09:32:49.066026495Z", + "end_time": "2025-12-23T09:32:49.066078197Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3274, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/00197f390e3dd2ce02eabb5fd44bb40b89b64389.json b/data/output/00197f390e3dd2ce02eabb5fd44bb40b89b64389.json new file mode 100644 index 0000000..8337587 --- /dev/null +++ b/data/output/00197f390e3dd2ce02eabb5fd44bb40b89b64389.json @@ -0,0 +1,446 @@ +{ + "file_name": "00197f390e3dd2ce02eabb5fd44bb40b89b64389.txt", + "total_words": 814, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "a", + "count": 30 + }, + { + "word": "her", + "count": 27 + }, + { + "word": "she", + "count": 23 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "on", + "count": 12 + }, + { + "word": "police", + "count": 12 + }, + { + "word": "spies", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "It .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "20 p.", + "length": 5 + }, + { + "text": "'She .", + "length": 6 + }, + { + "text": "at 11.", + "length": 6 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "black shawl.", + "length": 12 + }, + { + "text": "Lydia Warren .", + "length": 14 + }, + { + "text": "What a bunch of BS.", + "length": 19 + }, + { + "text": "See below for video .", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'I answered that I was .", + "length": 24 + }, + { + "text": "challenged the decision.", + "length": 24 + }, + { + "text": "08:25 EST, 14 August 2012 .", + "length": 27 + }, + { + "text": "08:53 EST, 14 August 2012 .", + "length": 27 + }, + { + "text": "approaching my seventh year.", + "length": 28 + }, + { + "text": "on Sunday, authorities said.", + "length": 28 + }, + { + "text": "When she submitted a second .", + "length": 29 + }, + { + "text": "'People can take it however they .", + "length": 34 + }, + { + "text": "After a struggle, she was arrested.", + "length": 35 + }, + { + "text": "She then said, \"Well, you suck at it\".", + "length": 38 + }, + { + "text": "'I honestly think it describes who I .", + "length": 38 + }, + { + "text": "Amid the furor, Sydney and her mother .", + "length": 39 + }, + { + "text": "am,' Sydney told Today host Matt Lauer.", + "length": 39 + }, + { + "text": "Her mother was charged with contributing .", + "length": 42 + }, + { + "text": "When partygoers spotted the police, they .", + "length": 42 + }, + { + "text": "She will play a character named 'Missy' in .", + "length": 44 + }, + { + "text": "the science-fiction station for a movie called .", + "length": 48 + }, + { + "text": "The younger Spies was also released, on $500 bond.", + "length": 50 + }, + { + "text": "hill behind the house, The Durango Herald reported.", + "length": 51 + }, + { + "text": "yearbook committee at Durango High School in January.", + "length": 53 + }, + { + "text": "American Horror House, Spies revealed earlier this year.", + "length": 56 + }, + { + "text": "So they got hold of me and my mom and flew me out to Louisiana.", + "length": 63 + }, + { + "text": "police officer and resisting arrest and released on $10,000 bond.", + "length": 65 + }, + { + "text": "is not the sort of attention Spies wished to garner after hitting .", + "length": 67 + }, + { + "text": "headlines for the provocative photograph that was rejected by the .", + "length": 67 + }, + { + "text": "Officers chased her to her bedroom where she tried to slam the door.", + "length": 68 + }, + { + "text": "want, but I'm an outgoing person, and I really do think it's artistic.", + "length": 70 + }, + { + "text": "hopped over a chain-link fence in the back garden and ran up a steep .", + "length": 70 + }, + { + "text": "to the delinquency of a minor and misdemeanor counts of obstructing a .", + "length": 71 + }, + { + "text": "photograph - of her wearing a strapless dress - the group rejected it .", + "length": 71 + }, + { + "text": "again, claiming she was showing too much skin, and she and her mother .", + "length": 71 + }, + { + "text": "protested outside the school and appeared on the Today show to discuss .", + "length": 72 + }, + { + "text": "Her daughter had allegedly tried to stop police from catching her mother.", + "length": 73 + }, + { + "text": "the photograph, which shows her wearing just a short yellow skirt and a .", + "length": 73 + }, + { + "text": "On Facebook page on Monday evening, she posted: 'I just love being targeted.", + "length": 76 + }, + { + "text": "asked how long I had been a police officer,' an officer said in the arrest report.", + "length": 82 + }, + { + "text": "Speaking out: Sydney and her mother on the Today show after the story broke in January.", + "length": 87 + }, + { + "text": "She then 'broke into a full sprint for her front door', according to an incident report.", + "length": 88 + }, + { + "text": "As police carried her to the car, she went limp and appeared as if she had been drinking.", + "length": 89 + }, + { + "text": "' She has now completed her senior year but decided not to attend her graduation ceremony.", + "length": 90 + }, + { + "text": "She has pleaded not guilty to all three charges and has a pre-trial conference on Tuesday.", + "length": 90 + }, + { + "text": "Police approached Miki Spies, 45, and she allegedly refused to give her name or date of birth.", + "length": 94 + }, + { + "text": "Protest: The Spies challenged the yearbook committee decision by protesting outside the school .", + "length": 96 + }, + { + "text": "Her mother allegedly tried to run away from police when they found underage drinkers in her home .", + "length": 98 + }, + { + "text": "'She nearly kicked me in the groin,' an officer wrote in his report about the teenager's struggle.", + "length": 98 + }, + { + "text": "Caught: Sydney Spies, left, shot to fame after her racy yearbook photo was rejected by her school.", + "length": 98 + }, + { + "text": "' Not shy: The exposure following the scandal led to a small movie role in a Syfy movie for Spies .", + "length": 99 + }, + { + "text": "She has now been arrested along with her mother, Denise 'Miki' Spies, right, at a party at their home .", + "length": 103 + }, + { + "text": "Uproar: Spies had submitted this photograph for the yearbook but the committee deemed it inappropriate .", + "length": 104 + }, + { + "text": "When the teen was arrested for obstructing a police officer and put in a patrol car, she attempted to escape.", + "length": 109 + }, + { + "text": "Sydney Spies told the Westwood newspaper in Denver: 'The head of the Syfy network said he wanted me to be in a movie.", + "length": 117 + }, + { + "text": "' The exposure landed the aspiring model and actress a role in a movie on the Syfy channel, which will air on Halloween.", + "length": 120 + }, + { + "text": "Police arrived to find numerous underage drinkers with a beer keg, liquor bottles and cups of alcohol at the home in Durango, Colorado, .", + "length": 137 + }, + { + "text": "Instead, she has been focusing on her prospective career as a model and actress although these latest photographs - mugshots - are perhaps not what she had in mind.", + "length": 164 + }, + { + "text": "Sydney Spies, 18, allegedly tried to stop police arresting her mother, Denise 'Miki' Spies, who attempted to run away when authorities found teenagers drinking alcohol in her home.", + "length": 180 + }, + { + "text": "A teenager who attracted national attention when her racy high school yearbook photo was rejected for being too revealing has been arrested at a house party - along with her mother.", + "length": 181 + }, + { + "text": "As she was arrested, she began yelling that police were only called because she was a lesbian and shouted obscenities about Christians at the church's pastor, who had asked her to be quiet.", + "length": 189 + }, + { + "text": "' But it's not the first arrest for her mother, who was charged with resisting arrest, obstructing a police officer and trespassing for making noise in a church park early on June 24, records show.", + "length": 198 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6008733510971069 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:49.566303276Z", + "first_section_created": "2025-12-23T09:32:49.56663689Z", + "last_section_published": "2025-12-23T09:32:49.566939102Z", + "all_results_received": "2025-12-23T09:32:49.633789812Z", + "output_generated": "2025-12-23T09:32:49.633966319Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:49.56663689Z", + "publish_time": "2025-12-23T09:32:49.566939102Z", + "first_worker_start": "2025-12-23T09:32:49.567447023Z", + "last_worker_end": "2025-12-23T09:32:49.632888Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:49.567401821Z", + "start_time": "2025-12-23T09:32:49.567472724Z", + "end_time": "2025-12-23T09:32:49.567566628Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:49.56768Z", + "start_time": "2025-12-23T09:32:49.567819Z", + "end_time": "2025-12-23T09:32:49.632888Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:49.56738602Z", + "start_time": "2025-12-23T09:32:49.567458323Z", + "end_time": "2025-12-23T09:32:49.567589328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:49.56737492Z", + "start_time": "2025-12-23T09:32:49.567447023Z", + "end_time": "2025-12-23T09:32:49.567497625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4690, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0019a37dc79e4d4c490f7a3965fee27ea0105ff4.json b/data/output/0019a37dc79e4d4c490f7a3965fee27ea0105ff4.json new file mode 100644 index 0000000..29fa017 --- /dev/null +++ b/data/output/0019a37dc79e4d4c490f7a3965fee27ea0105ff4.json @@ -0,0 +1,330 @@ +{ + "file_name": "0019a37dc79e4d4c490f7a3965fee27ea0105ff4.txt", + "total_words": 689, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "their", + "count": 14 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "sex", + "count": 10 + }, + { + "word": "emily", + "count": 9 + }, + { + "word": "chris", + "count": 8 + }, + { + "word": "for", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Bianca London .", + "length": 15 + }, + { + "text": "'We are so compatible.", + "length": 22 + }, + { + "text": "09:03 EST, 6 August 2013 .", + "length": 26 + }, + { + "text": "12:02 EST, 6 August 2013 .", + "length": 26 + }, + { + "text": "'It's all good fun though.", + "length": 26 + }, + { + "text": "He knows everything about me.", + "length": 29 + }, + { + "text": "We laugh together and communicate.", + "length": 34 + }, + { + "text": "It's all part of trying new things.", + "length": 35 + }, + { + "text": "'Sometimes they are tricky to get right.", + "length": 40 + }, + { + "text": "'I've hit my head more times than I can remember.", + "length": 49 + }, + { + "text": "' The Wheelbarrow is shown in 3-D form on an iPad .", + "length": 51 + }, + { + "text": "But this way we know exactly how to lie or stand or kneel.", + "length": 58 + }, + { + "text": "Chris has fallen off the bed, off chairs, even out of our car.", + "length": 62 + }, + { + "text": "Historians believe Kama Sutra was written between 400 BCE and 200 CE.", + "length": 69 + }, + { + "text": "And within a couple of months the couple had found their way out of their sexual rut.", + "length": 85 + }, + { + "text": "But the couple kickstarted things in the bedroom after downloading the Xcite Books' 3D sex app.", + "length": 95 + }, + { + "text": "'We've tried all sorts together - ropes, handcuffs, toys and worked our way through the Kama Sutra.", + "length": 99 + }, + { + "text": "The mother-of-four Emily and boyfriend Chris take their iPad to bed with them for lovemaking sessions .", + "length": 103 + }, + { + "text": "' The couple say they are extremely compatible and the app has helped them get out of their sexual rut .", + "length": 104 + }, + { + "text": "The pair bought a copy of ancient Hindu sex bible, the Kama Sutra, to add a bit of spice to the bedroom.", + "length": 104 + }, + { + "text": "A mother has told how her sex life has been boosted after downloading a Kama Sutra app from the internet.", + "length": 105 + }, + { + "text": "Emily and Chris now describe themselves as being 'sex-obsessed' and sleep together every night of the week.", + "length": 107 + }, + { + "text": "The Karma Xcitra has interactive images of 69 positions and even allows users to add a customised soundtrack.", + "length": 109 + }, + { + "text": "The Kama Sutra is an ancient Indian Hindu book and part of it outlines practical advice on sexual intercourse.", + "length": 110 + }, + { + "text": "'Kāma' means sensual or sexual pleasure, and 'sūtra' literally means a thread or line that holds things together.", + "length": 115 + }, + { + "text": "The interactive programme added an extra 69 positions to the original found in the book, taking their tally to 444.", + "length": 115 + }, + { + "text": "'We’re using mobile technology to make the classic sex guide more relevant and easier to use for today’s readers.", + "length": 117 + }, + { + "text": "The pair have used the app and now mastered moves including the Wheelbarrow, the Catherine Wheel and the Prone Tiger .", + "length": 118 + }, + { + "text": "Emily said: 'It's really easy to try new moves - you can spin the screen around for a 360 degree view of each position.", + "length": 119 + }, + { + "text": "Whilst it mainly describes being sensuous, Kamasutra also ascribes to religious faith and tradition of the Hindu system.", + "length": 120 + }, + { + "text": "' Emily has dated Chris, who works as a career, since 2011 but after two years of passion their sex-life started to stall.", + "length": 122 + }, + { + "text": "Emily Hiley, 25, claims she has tried out 400 different positions with her boyfriend after getting the interactive 3D sex guide.", + "length": 128 + }, + { + "text": "Stay-at-home mother Emily said: 'I'd never had that sexual spark with someone before I met Chris - it was like he lit my desire fire.", + "length": 133 + }, + { + "text": "Emily has dated Chris since 2011 but after two years of passion their sex-life started to stall so they looked for ways to spice it up .", + "length": 136 + }, + { + "text": "A spokesman for Xcite Books, said: 'Karma Xcitra uses state-of-the-art technology to help couples explore interesting sexual positions whilst having fun in the process.", + "length": 168 + }, + { + "text": "Emily Hiley and her boyfriend Chris Eltringham have worked their way through more than 400 sexual positions after taking their iPad to bed and using the Kama Sutra 3D app, Karma Xcitra .", + "length": 186 + }, + { + "text": "The pair bought a copy of ancient Hindu sex bible, the Kama Sutra, to add a bit of spice to the bedroom but had worked their way through it in three months so looked for another option .", + "length": 186 + }, + { + "text": "After just three months they had worked their way through the entire book of 365 positions at their home in Erdington, Birmingham, so they started looking for another aid to satisfy their sexual appetite and found the app online.", + "length": 229 + }, + { + "text": "Mother-of-four Emily and boyfriend Chris Eltringham, 21, now take their iPad to bed with them for lovemaking sessions and the pair have mastered several complex sex moves including the Wheelbarrow, the Catherine Wheel and the Prone Tiger.", + "length": 238 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6020803451538086 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:50.068046618Z", + "first_section_created": "2025-12-23T09:32:50.068442034Z", + "last_section_published": "2025-12-23T09:32:50.068752846Z", + "all_results_received": "2025-12-23T09:32:50.134896428Z", + "output_generated": "2025-12-23T09:32:50.135047034Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:50.068442034Z", + "publish_time": "2025-12-23T09:32:50.068752846Z", + "first_worker_start": "2025-12-23T09:32:50.069106361Z", + "last_worker_end": "2025-12-23T09:32:50.132423Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:50.069144862Z", + "start_time": "2025-12-23T09:32:50.069219865Z", + "end_time": "2025-12-23T09:32:50.069306169Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:50.069353Z", + "start_time": "2025-12-23T09:32:50.069511Z", + "end_time": "2025-12-23T09:32:50.132423Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:50.06910226Z", + "start_time": "2025-12-23T09:32:50.069166163Z", + "end_time": "2025-12-23T09:32:50.069271267Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:50.069050658Z", + "start_time": "2025-12-23T09:32:50.069106361Z", + "end_time": "2025-12-23T09:32:50.069168363Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3861, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0019f3f79be4536756ac180d154bf67294d0c9b6.json b/data/output/0019f3f79be4536756ac180d154bf67294d0c9b6.json new file mode 100644 index 0000000..a1b2884 --- /dev/null +++ b/data/output/0019f3f79be4536756ac180d154bf67294d0c9b6.json @@ -0,0 +1,282 @@ +{ + "file_name": "0019f3f79be4536756ac180d154bf67294d0c9b6.txt", + "total_words": 563, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "he", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "his", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "bevan", + "count": 10 + }, + { + "word": "had", + "count": 10 + }, + { + "word": "in", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "He makes my skin crawl.", + "length": 23 + }, + { + "text": "He makes me feel so sick.", + "length": 25 + }, + { + "text": "12:04 EST, 12 March 2013 .", + "length": 26 + }, + { + "text": "12:25 EST, 12 March 2013 .", + "length": 26 + }, + { + "text": "'He is extremely ashamed and embarrassed.", + "length": 41 + }, + { + "text": "Ms Qureshi said his victims viewed the footage.", + "length": 47 + }, + { + "text": "' Another stated: 'I'm upset someone I trusted and worked with could do this.", + "length": 77 + }, + { + "text": "'The defendant snatched it from her and went to the rear fire exit,' said Ms Qureshi.", + "length": 85 + }, + { + "text": "'It was your own work colleagues that you were abusing in this organised and systematic way.", + "length": 92 + }, + { + "text": "'He was pressing buttons on the mobile phone and said he was deleting everything on the phone.", + "length": 94 + }, + { + "text": "Captured: Bevan had worked at Three Sisters and Bronte View residential home, in Haworth, West Yorks.", + "length": 101 + }, + { + "text": "She said: 'They were sickened and disgusted by what they had been shown and felt their privacy had been violated.", + "length": 113 + }, + { + "text": "' Sentencing Judge John Potter told Bevan: 'The purpose of these offences was to fulfill your own perverted sexual gratification.", + "length": 129 + }, + { + "text": "'This was in any view an insidious offence which must have caused distress to your victims when they were told what had occurred.", + "length": 129 + }, + { + "text": "' Bevan apologised to the woman who'd found it, but the police were informed and he was arrested and admitted it was a 'silly thing to do.", + "length": 138 + }, + { + "text": ", since 2007 where he secretly rigged up his Sony Ericsson mobile in the communal toilet and successfully recorded a number of colleagues .", + "length": 139 + }, + { + "text": "A voyeuristic support worker at a care home filmed female colleagues using the toilet for his own 'perverted sexual gratification', a court heard.", + "length": 146 + }, + { + "text": "' Police seized a Dell computer from his home and discovered seven videos had been downloaded featuring three of his female colleagues, and a male using the toilet.", + "length": 164 + }, + { + "text": "' Mitigating Arshad Mahmood said Bevan had not shown the videos to anybody else and he had initially set up the phone to record how many people were using the toilet.", + "length": 166 + }, + { + "text": "' The judge also imposed a sexual offences prevention order banning Bevan working further in residential care homes, entering his former employment or contacting his victims.", + "length": 174 + }, + { + "text": "Prosecutor Shamalia Qureshi said the phone was discovered on 1 December 2011 when a woman using the lavatory heard it vibrating and removed it and showed it to her supervisor.", + "length": 175 + }, + { + "text": "Married Graham Bevan, 53, watched the 'explicit' footage on his home computer of three women in the bathroom of the Three Sisters and Bronte View residential home, in Haworth, West Yorks.", + "length": 187 + }, + { + "text": "'However, it transpired that when he saw the videos he was aroused and so he accepts he kept these on his computer system,' said Mr Mahmood, who added Bevan had been fired by the residential home.", + "length": 196 + }, + { + "text": "Voyeur: Married Graham Bevan, 53, watched the 'explicit' footage on his home computer of three women in the bathroom of the Three Sisters and Bronte View residential home, in Haworth, West Yorks .", + "length": 196 + }, + { + "text": "Bradford Crown Court heard Bevan, of Haworth, had worked at the residential home since 2007 and had secretly rigged up his Sony Ericsson mobile in the communal toilet and successfully recorded a number of colleagues.", + "length": 216 + }, + { + "text": "Today, a remorseful Bevan - who is 'ashamed and embarrassed' - was given a three-year supervision order, with a 9pm to 7am curfew, ordered to complete 100 hours unpaid work and sign the sex offenders' register for five years after pleading guilty to two counts of voyeurism and one of attempting to observe a person doing a private act, without their consent.", + "length": 359 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.539925754070282 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:50.569588551Z", + "first_section_created": "2025-12-23T09:32:50.571171315Z", + "last_section_published": "2025-12-23T09:32:50.571388824Z", + "all_results_received": "2025-12-23T09:32:50.641862349Z", + "output_generated": "2025-12-23T09:32:50.642029356Z", + "total_processing_time_ms": 72, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:50.571171315Z", + "publish_time": "2025-12-23T09:32:50.571388824Z", + "first_worker_start": "2025-12-23T09:32:50.571922745Z", + "last_worker_end": "2025-12-23T09:32:50.641Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:50.571919045Z", + "start_time": "2025-12-23T09:32:50.571994248Z", + "end_time": "2025-12-23T09:32:50.572072351Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:50.572159Z", + "start_time": "2025-12-23T09:32:50.572299Z", + "end_time": "2025-12-23T09:32:50.641Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:50.571873643Z", + "start_time": "2025-12-23T09:32:50.571929446Z", + "end_time": "2025-12-23T09:32:50.572014749Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:50.571868343Z", + "start_time": "2025-12-23T09:32:50.571922745Z", + "end_time": "2025-12-23T09:32:50.571975847Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3337, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/001a44a51e775311e97a68e7abe3a1f09298e208.json b/data/output/001a44a51e775311e97a68e7abe3a1f09298e208.json new file mode 100644 index 0000000..8dceb59 --- /dev/null +++ b/data/output/001a44a51e775311e97a68e7abe3a1f09298e208.json @@ -0,0 +1,440 @@ +{ + "file_name": "001a44a51e775311e97a68e7abe3a1f09298e208.txt", + "total_words": 972, + "top_n_words": [ + { + "word": "the", + "count": 50 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "arsenal", + "count": 16 + }, + { + "word": "united", + "count": 16 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "as", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "The ball, when we SHOW!", + "length": 23 + }, + { + "text": "Is it a case of deja vu?", + "length": 24 + }, + { + "text": "Now, nobody is quite so sure.", + "length": 29 + }, + { + "text": "And they were absolutely right.", + "length": 31 + }, + { + "text": "But the finishing; ah, the finishing!", + "length": 37 + }, + { + "text": "As lessons go, it was utterly emphatic.", + "length": 39 + }, + { + "text": "But the summary had been perfectly accurate.", + "length": 44 + }, + { + "text": "Arsenal were dazzling in that early passage.", + "length": 44 + }, + { + "text": "Oxlade-Chamberlain was virtually unplayable.", + "length": 44 + }, + { + "text": "One imagines he might be a revelation at half-time.", + "length": 51 + }, + { + "text": "Wayne Rooney demonstrated it to clinical perfection.", + "length": 52 + }, + { + "text": "Confidence, then we create big chances,’ he boomed.", + "length": 53 + }, + { + "text": "’ A pause: ‘Of course, you’ll know what that means.", + "length": 57 + }, + { + "text": "After half an hour, the match produced a revealing cameo.", + "length": 57 + }, + { + "text": "Within a minute, the first United goal provided the answer.", + "length": 59 + }, + { + "text": "As United flourished, Arsenal feared there was even worse ahead.", + "length": 64 + }, + { + "text": "The assumption used to be that he was thinking profound thoughts.", + "length": 65 + }, + { + "text": "The Emirates sensed a penalty would be paid, and they were right.", + "length": 65 + }, + { + "text": "But, by comparison, the defences were accidents waiting to happen.", + "length": 66 + }, + { + "text": "’ We went into the match aware that both clubs have known better days.", + "length": 72 + }, + { + "text": "Louis van Gaal was another manager who almost cracked a smile; with good reason.", + "length": 80 + }, + { + "text": "But the chest-thumping chants of United’s travelling circus told their own story.", + "length": 83 + }, + { + "text": "’ For half a second, the manager’s post-match face hovered on the brink of a smile.", + "length": 87 + }, + { + "text": "But, as he would later point out, the result is the starting point of all the inquests.", + "length": 87 + }, + { + "text": "With a scrappy challenge, Wilshere provoked a sadly unequal spat with Marouane Fellaini.", + "length": 88 + }, + { + "text": "We sensed it would not be Arsenal’s night, and they started to play as if they knew it.", + "length": 89 + }, + { + "text": "Wayne Rooney (right) scored Manchester United's second goal in their 2-1 win at Arsenal .", + "length": 89 + }, + { + "text": "You could almost touch the apprehension and Wilshere’s departure increased that tension.", + "length": 90 + }, + { + "text": "There was a hum of mutinous muttering as they trooped out of the Emirates on Saturday night.", + "length": 92 + }, + { + "text": "As the United player reacted, Wilshere thrust his head towards his opponent’s face, twice.", + "length": 92 + }, + { + "text": "United captain Rooney celebrates scoring his side's second in their victory on Saturday night .", + "length": 95 + }, + { + "text": "As they struggled through those early anxieties, Van Gaal sat in his dugout, looking inscrutable.", + "length": 97 + }, + { + "text": "Random words are barked in mid-sentence, startling the sleepy with their passion: ‘When we KEEP!", + "length": 98 + }, + { + "text": "Jack Wilshere (right) was lucky not to be sent off after appearing to headbutt Marouane Fellaini .", + "length": 98 + }, + { + "text": "And as they wasted chance upon chance, United sensed that there were points of their own to be made.", + "length": 100 + }, + { + "text": "Arsenal striker Danny Welbeck (left) spurned a great chance when his shot was blocked by Paddy McNair .", + "length": 103 + }, + { + "text": "United boss Louis van Gaal watched the first half pensively as Arsenal dominated the opening exchanges .", + "length": 104 + }, + { + "text": "Alex Oxlade-Chamberlain (centre) was guilty of careless finishing as Arsenal missed several opportunities .", + "length": 107 + }, + { + "text": "The result heaps pressure on boss Arsene Wenger as his side could have won the match inside the first half .", + "length": 108 + }, + { + "text": "But he was honest enough to admit: ‘If Arsenal had scored in the first 20 minutes, then it is another game.", + "length": 109 + }, + { + "text": "Arsenal's Kieran Gibbs (left) and Santi Cazorla look dejected after their 2-1 defeat against Manchester United .", + "length": 112 + }, + { + "text": "United were second best at pretty well everything, save the one virtue which counts for more than all the others.", + "length": 113 + }, + { + "text": "Rooney spent 85 minutes watching some of the most expensive attackers in English football miss chance upon chance.", + "length": 114 + }, + { + "text": "Had the big man been in theatrical mood, then Arsenal would unquestionably, and needlessly, have been reduced to 10.", + "length": 116 + }, + { + "text": "He made a truly forlorn spectacle as he limped away, and you wondered how his team would cope without his creativity.", + "length": 117 + }, + { + "text": "But United’s manager is a serious man, who delivers his messages with the controlled frenzy of a revivalist preacher.", + "length": 119 + }, + { + "text": "No strident protest, no outright rebellion, nothing so crude; but the frustration, the discontent were all too evident.", + "length": 119 + }, + { + "text": "By now, Rooney was more influential, Fellaini was an imposing target, while Di Maria cannot be subdued for half a match.", + "length": 120 + }, + { + "text": "Having fashioned sufficient chances in the first 25 minutes to have won half a dozen matches, Arsenal frittered them one by one.", + "length": 128 + }, + { + "text": "Welbeck was profligate, Oxlade-Chamberlain careless, while Wilshere treated his chances with the extravagance of a lottery winner in a casino.", + "length": 142 + }, + { + "text": "United were installed in the top four, while what might have been a night of solid Arsenal advancement became a slow decline into upper mid-table.", + "length": 146 + }, + { + "text": "The chance still cropped up, but there was an absence of conviction, optimism, all those qualities which teams at the top tend to take for granted.", + "length": 147 + }, + { + "text": "It was best expressed at Arsene Wenger’s press conference, when an earnest scribbler began: ‘Arsene: dominating teams and not finishing them off.", + "length": 149 + }, + { + "text": "Their pace was bewildering, their passing inspired, and all the doubts which United’s defenders were clearly nourishing came crowding remorselessly in.", + "length": 153 + }, + { + "text": "Wilshere brings many qualities to this team, but there is a truculent self-indulgence which threatens to prevent his becoming the kind of player he ought to be.", + "length": 160 + }, + { + "text": "Yet Rooney knew, Arsenal knew, everybody in this part of North London knew that Arsenal should have been home and hosed before United had even raised their voice.", + "length": 162 + }, + { + "text": "Attacking talent fairly flooded the field; Danny Welbeck, Alex Oxlade-Chamberlain and Jack Wilshere on one side, Wayne Rooney, Robin van Persie and Angel di Maria on the other.", + "length": 176 + }, + { + "text": "Then, with United breaking at pace and the Arsenal defence suicidally stretched, he accelerated on to a pass, changed from foot to foot, and raised a delicate chip beyond the goalkeeper.", + "length": 186 + }, + { + "text": "Time was when they debated the glittering prizes, now they tend to know their place; a couple of dowagers, squabbling over the dinner service while the men with new money make off with the deeds to the castle.", + "length": 209 + }, + { + "text": "Arsenal have long toiled to accommodate their suspect central defenders, but United’s injury list has pushed players like Paddy McNair and Tyler Blackett into situations which ideally would have been long delayed.", + "length": 215 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5097758173942566 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:51.072272101Z", + "first_section_created": "2025-12-23T09:32:51.074287882Z", + "last_section_published": "2025-12-23T09:32:51.074621295Z", + "all_results_received": "2025-12-23T09:32:51.166823691Z", + "output_generated": "2025-12-23T09:32:51.1670638Z", + "total_processing_time_ms": 94, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:51.074287882Z", + "publish_time": "2025-12-23T09:32:51.074512991Z", + "first_worker_start": "2025-12-23T09:32:51.075019211Z", + "last_worker_end": "2025-12-23T09:32:51.165902Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:51.075117515Z", + "start_time": "2025-12-23T09:32:51.075185018Z", + "end_time": "2025-12-23T09:32:51.075295322Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:51.075401Z", + "start_time": "2025-12-23T09:32:51.075529Z", + "end_time": "2025-12-23T09:32:51.165902Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:51.074959509Z", + "start_time": "2025-12-23T09:32:51.075020211Z", + "end_time": "2025-12-23T09:32:51.075145016Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:51.074949108Z", + "start_time": "2025-12-23T09:32:51.075019211Z", + "end_time": "2025-12-23T09:32:51.075058913Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:51.074554992Z", + "publish_time": "2025-12-23T09:32:51.074621295Z", + "first_worker_start": "2025-12-23T09:32:51.075139216Z", + "last_worker_end": "2025-12-23T09:32:51.148075Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:51.075270721Z", + "start_time": "2025-12-23T09:32:51.075300222Z", + "end_time": "2025-12-23T09:32:51.075320023Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:51.075442Z", + "start_time": "2025-12-23T09:32:51.075578Z", + "end_time": "2025-12-23T09:32:51.148075Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:51.075110515Z", + "start_time": "2025-12-23T09:32:51.075139216Z", + "end_time": "2025-12-23T09:32:51.075170217Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:51.07524172Z", + "start_time": "2025-12-23T09:32:51.075271821Z", + "end_time": "2025-12-23T09:32:51.075371225Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 162, + "min_processing_ms": 72, + "max_processing_ms": 90, + "avg_processing_ms": 81, + "median_processing_ms": 90, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2915, + "slowest_section_id": 0, + "slowest_section_time_ms": 91 + } +} diff --git a/data/output/001a6162391594e2a8607fba135bdfa154e57904.json b/data/output/001a6162391594e2a8607fba135bdfa154e57904.json new file mode 100644 index 0000000..079b8da --- /dev/null +++ b/data/output/001a6162391594e2a8607fba135bdfa154e57904.json @@ -0,0 +1,294 @@ +{ + "file_name": "001a6162391594e2a8607fba135bdfa154e57904.txt", + "total_words": 529, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "dha", + "count": 9 + }, + { + "word": "alzheimer", + "count": 8 + }, + { + "word": "that", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "D.", + "length": 2 + }, + { + "text": "D.", + "length": 2 + }, + { + "text": "D.", + "length": 2 + }, + { + "text": "Health.", + "length": 7 + }, + { + "text": "Health.", + "length": 7 + }, + { + "text": "(Health.", + "length": 8 + }, + { + "text": ") Health.", + "length": 9 + }, + { + "text": "Joseph F.", + "length": 9 + }, + { + "text": "Quinn, M.", + "length": 9 + }, + { + "text": "Ferris, Ph.", + "length": 11 + }, + { + "text": "Kristine Yaffe, M.", + "length": 18 + }, + { + "text": "Copyright Health Magazine 2011 .", + "length": 32 + }, + { + "text": "com: 9 foods that may help save your memory .", + "length": 45 + }, + { + "text": "com: Fish oil doesn't benefit new moms, babies .", + "length": 48 + }, + { + "text": "com: 25 signs and symptoms of Alzheimer's disease .", + "length": 51 + }, + { + "text": "\"Unfortunately, that seems to be the situation here.", + "length": 52 + }, + { + "text": "Research on DHA has been inconclusive and sometimes conflicting.", + "length": 64 + }, + { + "text": ", a neurologist at the Oregon Health and Science University, in Portland.", + "length": 73 + }, + { + "text": ", a professor of psychiatry at the University of California, San Francisco.", + "length": 75 + }, + { + "text": "\" The study was funded by the National Institute on Aging and was led by Dr.", + "length": 76 + }, + { + "text": ", the director of the Aging and Dementia Research Center at New York University.", + "length": 80 + }, + { + "text": "Treatments such as DHA may be too little too late for people who are already showing signs of Alzheimer's, according to Dr.", + "length": 123 + }, + { + "text": "(The other main ingredient found in fish oil, eicosapentaenoic acid, or EPA, is not believed to play a significant role in brain health.", + "length": 136 + }, + { + "text": "Despite the disappointing results, the study doesn't entirely rule out the possibility that DHA may have some benefit if taken earlier in life.", + "length": 143 + }, + { + "text": "DHA or fish-oil supplements aren't likely to cause any harm to Alzheimer's patients, but they aren't likely to do any good either, says Steven H.", + "length": 145 + }, + { + "text": "After 18 months, the average mental decline in the DHA and placebo groups was nearly identical, as measured on two separate tests and rating scales.", + "length": 148 + }, + { + "text": "\"Effective treatment strategies to prevent progression of [Alzheimer's disease] will likely need to be initiated earlier,\" Yaffe writes in an editorial accompanying the study.", + "length": 175 + }, + { + "text": "A growing body of research suggests that dementia begins decades before any noticeable symptoms surface, and it's possible that DHA helps prevent or slow those harmful changes.", + "length": 176 + }, + { + "text": "Fish oil \"seems to be healthy in general, and maybe for other things it's helpful, but it doesn't benefit cognitive function in a person with Alzheimer's,\" says Ferris, who was not involved in the study.", + "length": 203 + }, + { + "text": "com) -- An essential nutrient found in fish oil does not appear to slow the mental decline associated with Alzheimer's disease, according to a new study in the Journal of the American Medical Association.", + "length": 204 + }, + { + "text": "The study is merely the latest to cast doubt on the mental benefits of the omega-3 fatty acid docosahexaenoic acid (DHA), which until recently was considered a promising way to minimize the risk and damage of dementia.", + "length": 218 + }, + { + "text": "Quinn and his colleagues randomly assigned about 400 women and men in their mid-70s with likely Alzheimer's disease -- the disease is very difficult to accurately diagnose -- to take 2 grams of DHA or placebo capsules per day.", + "length": 226 + }, + { + "text": "Several studies that followed large groups of people as they aged have suggested that a diet rich in fish is linked to a reduced risk of dementia and mental decline, but most randomized controlled trials comparing DHA supplements with placebo have found no benefit.", + "length": 265 + }, + { + "text": "\"It's not the first time something in large epidemiological datasets just didn't work out clinically,\" Ferris says, noting that statin medications, anti-inflammatory drugs, and estrogen therapy have all failed to live up to their initial promise in preventing or treating Alzheimer's.", + "length": 284 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5096400380134583 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:51.575413568Z", + "first_section_created": "2025-12-23T09:32:51.577439049Z", + "last_section_published": "2025-12-23T09:32:51.577619256Z", + "all_results_received": "2025-12-23T09:32:51.637805668Z", + "output_generated": "2025-12-23T09:32:51.637981876Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:51.577439049Z", + "publish_time": "2025-12-23T09:32:51.577619256Z", + "first_worker_start": "2025-12-23T09:32:51.578114876Z", + "last_worker_end": "2025-12-23T09:32:51.636869Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:51.578056174Z", + "start_time": "2025-12-23T09:32:51.578126476Z", + "end_time": "2025-12-23T09:32:51.57820618Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:51.578298Z", + "start_time": "2025-12-23T09:32:51.578445Z", + "end_time": "2025-12-23T09:32:51.636869Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:51.578059174Z", + "start_time": "2025-12-23T09:32:51.578121076Z", + "end_time": "2025-12-23T09:32:51.578197279Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:51.578053273Z", + "start_time": "2025-12-23T09:32:51.578114876Z", + "end_time": "2025-12-23T09:32:51.578156178Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3147, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/001a6d2f631a3d39f67e1c10a220cd7e3f4e2e9a.json b/data/output/001a6d2f631a3d39f67e1c10a220cd7e3f4e2e9a.json new file mode 100644 index 0000000..85527c4 --- /dev/null +++ b/data/output/001a6d2f631a3d39f67e1c10a220cd7e3f4e2e9a.json @@ -0,0 +1,282 @@ +{ + "file_name": "001a6d2f631a3d39f67e1c10a220cd7e3f4e2e9a.txt", + "total_words": 454, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "i", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "on", + "count": 11 + }, + { + "word": "be", + "count": 9 + }, + { + "word": "year", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "back", + "count": 7 + }, + { + "word": "s", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Me and .", + "length": 8 + }, + { + "text": "'I'd like to .", + "length": 14 + }, + { + "text": "5million deal.", + "length": 14 + }, + { + "text": "Stay tuned as I .", + "length": 17 + }, + { + "text": "I've had a great time on .", + "length": 26 + }, + { + "text": "It might be Mel B, I don't know!", + "length": 32 + }, + { + "text": "'But we're just going to have to see.", + "length": 37 + }, + { + "text": "have a very exciting announcement soon!", + "length": 39 + }, + { + "text": "But I think it probably will be Nicole.", + "length": 39 + }, + { + "text": "won't be part of the X Factor panel this year.", + "length": 46 + }, + { + "text": "helped to get her back, because she's great fun.", + "length": 48 + }, + { + "text": "He told RTE Ten: 'I'm back, I am definitely back.", + "length": 49 + }, + { + "text": "She wrote: 'Just wanted to let you all know that I .", + "length": 52 + }, + { + "text": "He added: 'I hope it's going to be Tulisa or Nicole.", + "length": 52 + }, + { + "text": "thank everyone on The X Factor for two amazing years.", + "length": 53 + }, + { + "text": "She said: 'I'm definitely dancing around with the idea.", + "length": 55 + }, + { + "text": "the show, but this year it's time to do something different.", + "length": 60 + }, + { + "text": "Gary, we're the bookends and I'm sure Sharon Osbourne's back and I .", + "length": 68 + }, + { + "text": "' Earlier this month, reports claimed Sharon was being lured back with a £1.", + "length": 77 + }, + { + "text": "Her little muffins: Tulisa won The X Factor in 2011 with her girl group Little Mix .", + "length": 84 + }, + { + "text": "Tulisa Contostavlos has confirmed speculation she will not be returning to The X Factor judging panel.", + "length": 102 + }, + { + "text": "Big decision: Tulisa revealed she wouldn't be returning to The X Factor for a third series on Tuesday .", + "length": 103 + }, + { + "text": "Moving on: Tulisa Contostavlos, pictured at the Sony Radio Academy Awards last year, is leaving The X Factor .", + "length": 110 + }, + { + "text": "Back for more: Gary Barlow, Louis Walsh and Nicole Scherzinger will all be be back on the panel for this year's show .", + "length": 118 + }, + { + "text": "The Irish music manager also gave a strong hint that Sharon Osbourne - who left the panel after the 2007 series - was returning.", + "length": 128 + }, + { + "text": "The former N-Dubz singer, 24, announced the news on her Twitter page on Tuesday, declaring it was 'time to do something different'.", + "length": 131 + }, + { + "text": "However, Louis was in the dark over whether or not Nicole Scherzinger would return after she joined last year as a replacement for Kelly Rowland.", + "length": 145 + }, + { + "text": "The pop star's announcement follows months of speculation Simon Cowell was looking to shake up the judging panel this year after a slump in viewing figures.", + "length": 156 + }, + { + "text": "' Tulisa's decision to depart following two years on the ITV talent show comes as veteran judge Louis Walsh confirmed he would be returning alongside Gary Barlow.", + "length": 162 + }, + { + "text": "' In an interview with the MailOnline in March, Nicole admitted she hadn't made up her mind about returning following a year on the UK series and the previous year on the US version.", + "length": 182 + }, + { + "text": "The show was one of my greatest highs from next year, having two of my contestants in the next two for the first time ever on the show was quite an accomplishment for me and for my boys.", + "length": 186 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.45982348918914795 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:52.077029681Z", + "first_section_created": "2025-12-23T09:32:52.078279231Z", + "last_section_published": "2025-12-23T09:32:52.078439638Z", + "all_results_received": "2025-12-23T09:32:52.142419902Z", + "output_generated": "2025-12-23T09:32:52.142605109Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:52.078279231Z", + "publish_time": "2025-12-23T09:32:52.078439638Z", + "first_worker_start": "2025-12-23T09:32:52.078983459Z", + "last_worker_end": "2025-12-23T09:32:52.141554Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:52.078954558Z", + "start_time": "2025-12-23T09:32:52.079020861Z", + "end_time": "2025-12-23T09:32:52.079078663Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:52.079154Z", + "start_time": "2025-12-23T09:32:52.079287Z", + "end_time": "2025-12-23T09:32:52.141554Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:52.078957258Z", + "start_time": "2025-12-23T09:32:52.079025261Z", + "end_time": "2025-12-23T09:32:52.079123765Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:52.078917157Z", + "start_time": "2025-12-23T09:32:52.078983459Z", + "end_time": "2025-12-23T09:32:52.079031361Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2420, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/001a8b20279cb0b829cdb4fa8cd6997949c33f09.json b/data/output/001a8b20279cb0b829cdb4fa8cd6997949c33f09.json new file mode 100644 index 0000000..a0da669 --- /dev/null +++ b/data/output/001a8b20279cb0b829cdb4fa8cd6997949c33f09.json @@ -0,0 +1,384 @@ +{ + "file_name": "001a8b20279cb0b829cdb4fa8cd6997949c33f09.txt", + "total_words": 949, + "top_n_words": [ + { + "word": "the", + "count": 48 + }, + { + "word": "her", + "count": 41 + }, + { + "word": "to", + "count": 34 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "of", + "count": 26 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "she", + "count": 17 + }, + { + "word": "was", + "count": 13 + }, + { + "word": "for", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "ua.", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'Look at me.", + "length": 12 + }, + { + "text": "That’s all.", + "length": 13 + }, + { + "text": "Simon Tomlinson .", + "length": 17 + }, + { + "text": "and Jill Reilly .", + "length": 17 + }, + { + "text": "and Chris Parsons .", + "length": 19 + }, + { + "text": "I thought I had died.", + "length": 21 + }, + { + "text": "SCROLL DOWN FOR VIDEO .", + "length": 23 + }, + { + "text": "' and 'Execute the b****s!", + "length": 26 + }, + { + "text": "In an interview with Gazeta.", + "length": 28 + }, + { + "text": "Do I look like a rich woman?", + "length": 28 + }, + { + "text": "I remember it all as if in a fog.", + "length": 33 + }, + { + "text": "At first, I was screaming for help.", + "length": 35 + }, + { + "text": "YouTube video calmly describing the attack.", + "length": 43 + }, + { + "text": "'But there was nobody there, I began to pray.", + "length": 45 + }, + { + "text": "ua after the attack, she said: 'I could not feel my body.", + "length": 57 + }, + { + "text": "' she said in an interview with a Ukrainian website, Gazeta.", + "length": 60 + }, + { + "text": "After undergoing surgery, she was finally able to recount her ordeal to police.", + "length": 79 + }, + { + "text": "Miss Makar was found barely alive the following day by a man who heard her moaning.", + "length": 83 + }, + { + "text": "She starts to describe the attack, but then breaks off looking visibly upset and says she can’t.", + "length": 98 + }, + { + "text": "' Miss Makar starts to describe the attack, but then breaks off looking visibly upset and says she can’t.", + "length": 107 + }, + { + "text": "When her mother asks her how she will survive, the blonde-headed young woman says: 'How I will live, I will live.", + "length": 113 + }, + { + "text": "Tragic: Oksana Makar claimed before she died she was gang-raped, dumped in a ditch and set on fire by three men .", + "length": 113 + }, + { + "text": "Prosecutors say they did not question Miss Makar because she was not conscious despite her mother insisting she was able to speak.", + "length": 130 + }, + { + "text": "But after a two-week fight for life, health officials in Donetsk said Oksana, 18, died today of severe burns and damage to her lungs.", + "length": 133 + }, + { + "text": "A Ukrainian teen who was gang-raped, dumped in a ditch and set on fire has died days after she posted a tragic online video revealing her ordeal.", + "length": 145 + }, + { + "text": "She regained consciousness on Friday, but doctors induced a coma and moved her from a hospital in Mykolayiv to a burn treatment center in Donetsk.", + "length": 146 + }, + { + "text": "The 18-year-old claims she was invited by Prisyjnikov and Pogosyan to their friend Yevgeniy Krasnoshek's apartment, where she was allegedly raped.", + "length": 146 + }, + { + "text": "Women's rights group Femen staged a topless protest in front of the general prosecutors office in the capital, Kyiv, chanting 'Death to the sadists!", + "length": 148 + }, + { + "text": "Ms Surovitska has already been accused of spending fundraising money for her daughter's care on herself, as well as charging reporters for interviews.", + "length": 150 + }, + { + "text": "' The three suspects were arrested, but two of them were released later that day due to lack of evidence, according to the Mykolayiv prosecutor’s office.", + "length": 155 + }, + { + "text": "Some Members of Parliament have called for Ukraine to bring in the death penalty for the accused, with one even suggesting the alleged rapists be castrated.", + "length": 156 + }, + { + "text": "When questioned as to what punishment her attackers should receive after the March 9 attack, the teen replies their testicles should be cut off and fed to dogs.", + "length": 160 + }, + { + "text": "Oksana Makar bravely told from her hospital bed how she was raped, strangled and burned by three men in a savage attack which stripped off 55 per cent of her skin.", + "length": 163 + }, + { + "text": "Her harrowing hospital video, filmed by her mother, emerged after medics had been forced to amputate one of her arms and both her feet in a battle to keep her alive.", + "length": 165 + }, + { + "text": "Two of the suspects were Maxim Prisyjnikov, 23, the son of the regional administrator, and Artyon Pogosyan, 21, the son of the regional prosecutor, according to the Hurriyet Daily News.", + "length": 185 + }, + { + "text": "The fire stripped off 55 per cent of her skin and surgeons had to amputate one of her arms and both her feet as they battled to keep her alive - her bandaged shoulder can be seen on her right side.", + "length": 197 + }, + { + "text": "In the 1:19 minute video clip which emerged earlier this month, Oksana looks distraught as her mother, Tetyana Surovitska, urges her to describe her horrific ordeal by saying 'a couple of words to Ukraine'.", + "length": 206 + }, + { + "text": "The shocking case sparked mass protests across Ukraine and naked campaigners led protests over the sloppy nature of the police investigation and the perceived leniency with which the suspects have been treated.", + "length": 210 + }, + { + "text": "The rape attack on Oksana shocked Ukraine with its cruelty, and also caused nationwide protests against corruption after police initially released two of the suspects who had powerful connections in the region.", + "length": 210 + }, + { + "text": "' The demonstrations led Ukrainian President Yanukovych to order the general prosecutor to oversee the case and on March 13 the two men were re-arrested and all three suspects were charged with attempted murder.", + "length": 211 + }, + { + "text": "' Her mother has been criticised for posting the video online with many users suggesting it was insensitive to her daughter asking her to recount her harrowing experience publicly when 'she is clearly distraught and in pain.", + "length": 224 + }, + { + "text": "Bare necessities: Women's rights group Femen protest naked outside the state prosecutor's office in the Ukrainian capital of Kiev to demand tough punishment for three men accused of raping and set alight an 18-year-old woman .", + "length": 226 + }, + { + "text": "Outcry: Campaigners were also angry at the perceived leniency handed down to the arrested men, two of whom were initially released over 'lack of evidence' They tried to strangle her to death before wrapping her in a sheet, dumping her in a pit at a construction site and setting her alight, it was alleged in the Kyiv Post.", + "length": 323 + }, + { + "text": "Harrowing: In the video, Oksana Makar, 18, looks in pain as her mother, Tetyana Surovitska, urges her to describe her horrific ordeal by saying 'a couple of words to Ukraine' Painful: The fire stripped off 55 per cent of her skin and surgeons had to amputate one of her arms and both her feet as they tried and eventually failed to keep her alive .", + "length": 348 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.820906788110733 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:52.579189008Z", + "first_section_created": "2025-12-23T09:32:52.57948782Z", + "last_section_published": "2025-12-23T09:32:52.579814233Z", + "all_results_received": "2025-12-23T09:32:52.679150215Z", + "output_generated": "2025-12-23T09:32:52.679369424Z", + "total_processing_time_ms": 100, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 99, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:52.57948782Z", + "publish_time": "2025-12-23T09:32:52.579703029Z", + "first_worker_start": "2025-12-23T09:32:52.580366256Z", + "last_worker_end": "2025-12-23T09:32:52.678303Z", + "total_journey_time_ms": 98, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:52.580340355Z", + "start_time": "2025-12-23T09:32:52.580399857Z", + "end_time": "2025-12-23T09:32:52.580517262Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:52.5805Z", + "start_time": "2025-12-23T09:32:52.580619Z", + "end_time": "2025-12-23T09:32:52.678303Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 97 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:52.580381656Z", + "start_time": "2025-12-23T09:32:52.580463459Z", + "end_time": "2025-12-23T09:32:52.580569964Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:52.580277352Z", + "start_time": "2025-12-23T09:32:52.580366256Z", + "end_time": "2025-12-23T09:32:52.580417258Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:52.579755831Z", + "publish_time": "2025-12-23T09:32:52.579814233Z", + "first_worker_start": "2025-12-23T09:32:52.580391357Z", + "last_worker_end": "2025-12-23T09:32:52.646696Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:52.580340855Z", + "start_time": "2025-12-23T09:32:52.580391357Z", + "end_time": "2025-12-23T09:32:52.580403457Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:52.580516Z", + "start_time": "2025-12-23T09:32:52.580619Z", + "end_time": "2025-12-23T09:32:52.646696Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:52.580424758Z", + "start_time": "2025-12-23T09:32:52.580463859Z", + "end_time": "2025-12-23T09:32:52.58048366Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:52.580384256Z", + "start_time": "2025-12-23T09:32:52.580414557Z", + "end_time": "2025-12-23T09:32:52.580429658Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 163, + "min_processing_ms": 66, + "max_processing_ms": 97, + "avg_processing_ms": 81, + "median_processing_ms": 97, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2712, + "slowest_section_id": 0, + "slowest_section_time_ms": 98 + } +} diff --git a/data/output/001aae426b4742676f9d944ad7cb08c07190035a.json b/data/output/001aae426b4742676f9d944ad7cb08c07190035a.json new file mode 100644 index 0000000..324e0ee --- /dev/null +++ b/data/output/001aae426b4742676f9d944ad7cb08c07190035a.json @@ -0,0 +1,270 @@ +{ + "file_name": "001aae426b4742676f9d944ad7cb08c07190035a.txt", + "total_words": 550, + "top_n_words": [ + { + "word": "the", + "count": 50 + }, + { + "word": "a", + "count": 30 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "public", + "count": 19 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "is", + "count": 11 + }, + { + "word": "tanker", + "count": 9 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "indian", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "' But one woman wrote: 'Hilarious!", + "length": 34 + }, + { + "text": "One user wrote: 'This is not the solution.", + "length": 42 + }, + { + "text": "Above, a man urinates against a wall in Mumbai .", + "length": 48 + }, + { + "text": "Above, a masked vigilante sprays water at a member of the public .", + "length": 66 + }, + { + "text": "Above, an Indian man caught urinating in the street is blasted by the vigilantes .", + "length": 82 + }, + { + "text": "' And another added: 'This is a good way to teach them (public urinators) to use public toilets!", + "length": 96 + }, + { + "text": "Meanwhile, another said: 'That's degrading for those men even if they are doing something wrong.", + "length": 96 + }, + { + "text": "It also includes a rooftop area, from which the masked vigilantes can spray members of the public .", + "length": 99 + }, + { + "text": "The Clean Indian anti-public urination activists have a novel way of punishing men who p*** in public.", + "length": 102 + }, + { + "text": "Lack of availability of Public toilets is the problem that is causing general public to act like that.", + "length": 102 + }, + { + "text": "Action: Vigilantes have been patrolling Mumbai with a water tanker - and spraying anyone who pees in public .", + "length": 109 + }, + { + "text": "Aim: It aims to deter those who feel no shame relieving themselves in the streets of India's most populous city .", + "length": 113 + }, + { + "text": "On the lookout: The footage has divided opinion, with many users saying a lack of public toilets is the problem .", + "length": 113 + }, + { + "text": "Vigilantes have been patrolling the Indian city with a water tanker - and spraying anyone who urinates in public.", + "length": 113 + }, + { + "text": "Mid-flow: A motorcyclist caught using the streets of the Indian city as a public toilet is blasted by the activists .", + "length": 117 + }, + { + "text": "On patrol: The tanker, dubbed the 'P***ing Tanker', features an enormous water canon and a logo of a person urinating.", + "length": 118 + }, + { + "text": "For those caught using the streets of Mumbai as a public toilet, the pavement is not the only thing to face getting wet.", + "length": 120 + }, + { + "text": "It also includes a large rooftop area, from which the masked vigilantes can blast unsuspecting members of the public with water.", + "length": 128 + }, + { + "text": "The bright yellow tanker is believed to be the creation of an anonymous 'anti-public urination activist group', called the Clean Indian.", + "length": 136 + }, + { + "text": "Soaked: The bright yellow tanker is believed to be the creation of an anonymous 'anti-public urination activist group', called the Clean Indian.", + "length": 144 + }, + { + "text": "The vehicle, dubbed the 'P***ing Tanker', features a huge water canon, a logo of a person urinating and a large red sign, reading: 'You Stop, We Stop'.", + "length": 151 + }, + { + "text": "It also hopes to combat the health risk of public urination - which the activists claim remains a major problem in the country - 'one spray at a time'.", + "length": 151 + }, + { + "text": "Taking aim: The group has posted a video of the tanker on YouTube, saying: 'You wont' believe what we did to people using the outdoors as a toilet' (sic).", + "length": 154 + }, + { + "text": "It aims to deter those who feel no shame relieving themselves in the streets of India's most populous city by turning the tables and 'taking a leak' on them.", + "length": 157 + }, + { + "text": "The group has posted a video of the tanker at work on YouTube, with the caption: 'You wont' believe what we did to people using the outdoors as a toilet' (sic).", + "length": 160 + }, + { + "text": "' Health risk: It also hopes to combat the health risk of public urination - which the activists claim remains a major problem in the country - 'one spray at a time'.", + "length": 166 + }, + { + "text": "But the footage has divided opinion - with many users claiming that a lack of public toilets in the state of Maharashtra is the underlying problem, not those peeing in the street.", + "length": 179 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5316675901412964 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:53.080874117Z", + "first_section_created": "2025-12-23T09:32:53.081181929Z", + "last_section_published": "2025-12-23T09:32:53.081398038Z", + "all_results_received": "2025-12-23T09:32:53.148207016Z", + "output_generated": "2025-12-23T09:32:53.148413824Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:53.081181929Z", + "publish_time": "2025-12-23T09:32:53.081398038Z", + "first_worker_start": "2025-12-23T09:32:53.081876957Z", + "last_worker_end": "2025-12-23T09:32:53.147205Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:53.081882257Z", + "start_time": "2025-12-23T09:32:53.08194466Z", + "end_time": "2025-12-23T09:32:53.082001062Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:53.082103Z", + "start_time": "2025-12-23T09:32:53.082286Z", + "end_time": "2025-12-23T09:32:53.147205Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:53.081862456Z", + "start_time": "2025-12-23T09:32:53.081934159Z", + "end_time": "2025-12-23T09:32:53.082008862Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:53.081823555Z", + "start_time": "2025-12-23T09:32:53.081876957Z", + "end_time": "2025-12-23T09:32:53.081908558Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3144, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/001adf6209be103cb198b8599f236b4d5760a5fe.json b/data/output/001adf6209be103cb198b8599f236b4d5760a5fe.json new file mode 100644 index 0000000..1b494e9 --- /dev/null +++ b/data/output/001adf6209be103cb198b8599f236b4d5760a5fe.json @@ -0,0 +1,202 @@ +{ + "file_name": "001adf6209be103cb198b8599f236b4d5760a5fe.txt", + "total_words": 133, + "top_n_words": [ + { + "word": "the", + "count": 7 + }, + { + "word": "s", + "count": 5 + }, + { + "word": "drama", + "count": 4 + }, + { + "word": "of", + "count": 4 + }, + { + "word": "a", + "count": 3 + }, + { + "word": "and", + "count": 3 + }, + { + "word": "meloni", + "count": 3 + }, + { + "word": "on", + "count": 3 + }, + { + "word": "to", + "count": 3 + }, + { + "word": "chris", + "count": 2 + } + ], + "sorted_sentences": [ + { + "text": "(EW.", + "length": 4 + }, + { + "text": "com.", + "length": 4 + }, + { + "text": "All rights reserved.", + "length": 20 + }, + { + "text": "See the full article at EW.", + "length": 27 + }, + { + "text": "\" He will be a series regular.", + "length": 30 + }, + { + "text": "© 2011 Entertainment Weekly and Time Inc.", + "length": 42 + }, + { + "text": "CLICK HERE to Try 2 RISK FREE issues of Entertainment Weekly .", + "length": 62 + }, + { + "text": "com) -- Chris Meloni has booked his first post-\"SVU\" gig -- and it's a bloody good one.", + "length": 87 + }, + { + "text": "Earlier this year, Meloni decided to step down from playing Detective Elliot Stabler on the long-running NBC drama.", + "length": 115 + }, + { + "text": "The former star of the Dick Wolf drama will join HBO's \"True Blood\" in season 5 as \"ancient, powerful vampire who holds the fate of Bill and Eric in his hands.", + "length": 159 + }, + { + "text": "Meloni's role on Alan Ball's drama marks a homecoming, of sorts: The actor previously played Chris Keller on the pay cabler's gritty drama \"Oz\" from 1998 to 2003.", + "length": 162 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4580982029438019 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:53.582225312Z", + "first_section_created": "2025-12-23T09:32:53.584295195Z", + "last_section_published": "2025-12-23T09:32:53.584503003Z", + "all_results_received": "2025-12-23T09:32:53.655214237Z", + "output_generated": "2025-12-23T09:32:53.655309141Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:53.584295195Z", + "publish_time": "2025-12-23T09:32:53.584503003Z", + "first_worker_start": "2025-12-23T09:32:53.584885718Z", + "last_worker_end": "2025-12-23T09:32:53.654266Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:53.58492442Z", + "start_time": "2025-12-23T09:32:53.584986922Z", + "end_time": "2025-12-23T09:32:53.585010723Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:53.585324Z", + "start_time": "2025-12-23T09:32:53.585453Z", + "end_time": "2025-12-23T09:32:53.654266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:53.58492212Z", + "start_time": "2025-12-23T09:32:53.584991823Z", + "end_time": "2025-12-23T09:32:53.585028924Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:53.584837516Z", + "start_time": "2025-12-23T09:32:53.584885718Z", + "end_time": "2025-12-23T09:32:53.584914319Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 719, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/001b4673dbb3437282cd2ea58d9eca471e25780f.json b/data/output/001b4673dbb3437282cd2ea58d9eca471e25780f.json new file mode 100644 index 0000000..11ada3c --- /dev/null +++ b/data/output/001b4673dbb3437282cd2ea58d9eca471e25780f.json @@ -0,0 +1,342 @@ +{ + "file_name": "001b4673dbb3437282cd2ea58d9eca471e25780f.txt", + "total_words": 789, + "top_n_words": [ + { + "word": "the", + "count": 46 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "filin", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "police", + "count": 11 + }, + { + "word": "for", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "com .", + "length": 5 + }, + { + "text": "Act 4 - A Moscow Court .", + "length": 24 + }, + { + "text": "Read the latest news on CNN.", + "length": 28 + }, + { + "text": "Act 1 - The Bolshoi Theatre .", + "length": 29 + }, + { + "text": "Someone slashed his car tires.", + "length": 30 + }, + { + "text": "Act 2 - The Streets of Moscow .", + "length": 31 + }, + { + "text": "Act 3 - Moscow Police Headquarters .", + "length": 36 + }, + { + "text": "Bolshoi Prima ballerina's grace under pressure .", + "length": 48 + }, + { + "text": "Somebody also attempted to hack his Facebook page.", + "length": 50 + }, + { + "text": "It caused third-degree burns and left him blinded.", + "length": 50 + }, + { + "text": "\"I just took Yuri there, waited for him and gave him a lift back.", + "length": 65 + }, + { + "text": "Russian police question suspect in Bolshoi director's acid attack .", + "length": 67 + }, + { + "text": "It may take at least six months for Filin to recover from the burns.", + "length": 68 + }, + { + "text": "His colleagues are now working to ensure his artistic vision isn't lost.", + "length": 72 + }, + { + "text": "Calls from those phones led investigators to Lipatov, the alleged driver.", + "length": 73 + }, + { + "text": "But beyond professional disagreements, sinister factors were also at play.", + "length": 74 + }, + { + "text": "That year, two dancers quit, unhappy with the direction the ballet had taken.", + "length": 77 + }, + { + "text": "Police declared their case was solved this week with a confession by Dmitrichenko .", + "length": 83 + }, + { + "text": "\"I didn't see what happened there,\" Lipatov told them in a video released by police.", + "length": 84 + }, + { + "text": "In the meantime, Galina Stepanenko, a former principal dancer, will run the company.", + "length": 84 + }, + { + "text": "The characters in this drama include Dmitrichenko's girlfriend, Anzhelina Vorontsova.", + "length": 85 + }, + { + "text": "Tsiskaridze, incidentally, was also a contender for the artistic director job that Filin got.", + "length": 93 + }, + { + "text": "Detectives pored over interviews with those who knew Filin and had suspicions about who would harm him.", + "length": 103 + }, + { + "text": "\" When an interrogator asked Zarutsky about the crime, he allegedly said, \"I don't want to talk about it.", + "length": 105 + }, + { + "text": "There was \"fierce rivalry\" for the Bolshoi position at the time, according to the RIA Novosti news agency.", + "length": 106 + }, + { + "text": "Doctors performed a skin graft on Filin and, after a second eye surgery, they were able to save his sight.", + "length": 106 + }, + { + "text": "\"I organized this attack but not to the extent that it happened,\" he is heard saying in a video released by police.", + "length": 115 + }, + { + "text": "The three men faced a judge Thursday, who ordered that they be kept in police custody until the investigation is over.", + "length": 118 + }, + { + "text": "RIA Novosti reported that before the attack, Filin suffered months of intimidation, including threatening phone calls.", + "length": 118 + }, + { + "text": "If convicted, the attackers could face up to eight years in jail for willfully inflicting damage on the health of another.", + "length": 122 + }, + { + "text": "\" Police concluded a \"hostile relationship\" stemming from Dmitrichenko's professional interaction with Filin was his motive.", + "length": 124 + }, + { + "text": "While the final act must still play out in a Russian courtroom, the story opens in the nearly two-century-old Bolshoi Theatre.", + "length": 126 + }, + { + "text": "Another dancer, Nikolai Tsiskaridze, loudly criticized Filin for going over budget in the ballet's multimllion-dollar renovation.", + "length": 129 + }, + { + "text": "Other clues led them to several cell phones that Dmitrichenko had registered in other people's names, according to a police statement.", + "length": 134 + }, + { + "text": "She says she's going to follow Filin's plans and she believes the dancers will now be united by greater respect and care for each other.", + "length": 136 + }, + { + "text": "Ballet is a world where competition is fierce, and where the artistic director wields considerable influence in making or breaking careers.", + "length": 139 + }, + { + "text": "The trio of conspirators obtained battery acid at a car parts store, and made the acid stronger by evaporating the water from it, police said.", + "length": 142 + }, + { + "text": "It might well send Hollywood literary agents and producers scrambling for story details as described by police reports and local media accounts.", + "length": 144 + }, + { + "text": "She has not been charged, but local newspapers quote ballet members as saying Dmitrichenko was angry because he thought Filin was stifling her career.", + "length": 150 + }, + { + "text": "Dmitrichenko, who studied Filin's schedule, called Lipatov and Zarutsky when he saw Filin leave the theater on the cold night of January 17, police said.", + "length": 153 + }, + { + "text": "As Filin entered the security code at the door of his Moscow apartment, authorities say, Zarutsky confronted him and tossed the sulfuric acid into his face.", + "length": 156 + }, + { + "text": "Ballet dancer Pavel Dmitrichenko, often cast as the villain in Bolshoi Ballet productions, is now the lead defendant in a plot worthy of a Tchaikovsky score.", + "length": 157 + }, + { + "text": "The 29-year-old allegedly choreographed an attack intended to blind Bolshoi artistic director Sergei Filin, the man who put him in the roles of Ivan the Terrible and Swan Lake's evil genius.", + "length": 190 + }, + { + "text": "The mystery of who threw sulfuric acid into Filin's face in January has captivated Russians and kept Moscow detectives busy probing rivalries within Russia's renowned 240-year-old ballet company.", + "length": 195 + }, + { + "text": "Filin, 42, was promoted to the Bolshoi Theatre's coveted post in March 2011, shortly after the deputy ballet director, Gennady Yanin, who was widely seen as a favorite for the artistic director post, resigned when pornographic pictures of him surfaced online.", + "length": 259 + }, + { + "text": "Two alleged co-conspirators have been detained: Alleged hit man Yuri Zarutsky -- a burly, bearded Russian who was previously convicted of beating someone to death -- and Andrey Lipatov, who allegedly drove the getaway car after Zarutksy's battery acid attack on Filin.", + "length": 268 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5641673803329468 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:54.085283575Z", + "first_section_created": "2025-12-23T09:32:54.085588087Z", + "last_section_published": "2025-12-23T09:32:54.085839297Z", + "all_results_received": "2025-12-23T09:32:54.153376204Z", + "output_generated": "2025-12-23T09:32:54.153571512Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:54.085588087Z", + "publish_time": "2025-12-23T09:32:54.085839297Z", + "first_worker_start": "2025-12-23T09:32:54.086284515Z", + "last_worker_end": "2025-12-23T09:32:54.152403Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:54.087478163Z", + "start_time": "2025-12-23T09:32:54.087877379Z", + "end_time": "2025-12-23T09:32:54.087946282Z", + "queue_wait_time_ms": 2, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:54.08645Z", + "start_time": "2025-12-23T09:32:54.086594Z", + "end_time": "2025-12-23T09:32:54.152403Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:54.086256714Z", + "start_time": "2025-12-23T09:32:54.086380819Z", + "end_time": "2025-12-23T09:32:54.086506724Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:54.086209012Z", + "start_time": "2025-12-23T09:32:54.086284515Z", + "end_time": "2025-12-23T09:32:54.086332117Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 2 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4794, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/001b6c7b936d4fd765c6c970346b01154913e2a1.json b/data/output/001b6c7b936d4fd765c6c970346b01154913e2a1.json new file mode 100644 index 0000000..8836bca --- /dev/null +++ b/data/output/001b6c7b936d4fd765c6c970346b01154913e2a1.json @@ -0,0 +1,516 @@ +{ + "file_name": "001b6c7b936d4fd765c6c970346b01154913e2a1.txt", + "total_words": 1238, + "top_n_words": [ + { + "word": "the", + "count": 70 + }, + { + "word": "to", + "count": 47 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "and", + "count": 28 + }, + { + "word": "that", + "count": 28 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "s", + "count": 19 + }, + { + "word": "on", + "count": 18 + }, + { + "word": "administration", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "ambassador.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "David Martosko .", + "length": 16 + }, + { + "text": "These are heroes.", + "length": 17 + }, + { + "text": "South Caroline Rep.", + "length": 19 + }, + { + "text": "This is so outrageous.", + "length": 22 + }, + { + "text": "20:20 EST, 1 May 2013 .", + "length": 23 + }, + { + "text": "01:55 EST, 2 May 2013 .", + "length": 23 + }, + { + "text": "' SCROLL DOWN FOR VIDEO .", + "length": 25 + }, + { + "text": "'These are great Americans.", + "length": 27 + }, + { + "text": "That law partner, former U.", + "length": 27 + }, + { + "text": "That's what this is all about.", + "length": 30 + }, + { + "text": "' she asked, referring to the .", + "length": 31 + }, + { + "text": "' South Carolina Republican Rep.", + "length": 32 + }, + { + "text": "president and his spokespersons.", + "length": 32 + }, + { + "text": "They were on the ground in Benghazi.", + "length": 36 + }, + { + "text": "ambassador and three other Americans .", + "length": 38 + }, + { + "text": "that their careers will be over' if they testify.", + "length": 49 + }, + { + "text": "personnel who survived last year's attack on the U.", + "length": 51 + }, + { + "text": "11, 2012 in a military-style attack that killed the U.", + "length": 54 + }, + { + "text": "Issa's committee has already shed public light on the U.", + "length": 56 + }, + { + "text": "Ambassador Chris Stevens and three other Americans dead.", + "length": 56 + }, + { + "text": "' 'The whistle-blowers are out there,' DiGenova insisted.", + "length": 57 + }, + { + "text": "They're taking career people and making them well aware .", + "length": 57 + }, + { + "text": "President Obama denied on Tuesday that he was aware of any U.", + "length": 61 + }, + { + "text": "And they want to protect Hillary [Clinton] and the president.", + "length": 61 + }, + { + "text": "Consulate in Benghazi, Libya was attacked and burned on Sept.", + "length": 61 + }, + { + "text": "The raid on that diplomatic outpost happened eight months ago .", + "length": 63 + }, + { + "text": "'It is not going away despite the efforts of this administration.", + "length": 65 + }, + { + "text": "'She got a new Top Secret security clearance within the last year.", + "length": 66 + }, + { + "text": "'Benghazi is warming up,' Gowdy promised during a televised interview.", + "length": 70 + }, + { + "text": "\"' 'How can they possibly get up there and just lie to the press corps?", + "length": 71 + }, + { + "text": "The administration's effort to cover up whatever happened in Benghazi is going to fail.", + "length": 87 + }, + { + "text": "Consulate in Benghazi, Libya from testifying before Congress about what they experienced.", + "length": 89 + }, + { + "text": "They are all employees of the CIA and the State Department, according to a Fox News report.", + "length": 91 + }, + { + "text": "State Department's denials of requests for more robust security at the consulate in Benghazi.", + "length": 93 + }, + { + "text": "personnel who were being prevented from sharing their observations with congressional investigators.", + "length": 100 + }, + { + "text": "' 'There is going to be a Constitutional showdown here,' he predicted,' and 'Congress is going to win.", + "length": 102 + }, + { + "text": "'Not surprisingly, this version of events casts senior officials in the most favorable light possible.", + "length": 102 + }, + { + "text": "And now they will not clear her or any of the other lawyers to represent the Department of State people.", + "length": 104 + }, + { + "text": "would only need a letter from an administration attorney in order to learn what her client already knows.", + "length": 105 + }, + { + "text": "White House Press Secretary Jay Carney denied on Wednesday that the Obama administration was prohibiting any U.", + "length": 111 + }, + { + "text": "And in an October 2012 hearing, it produced evidence contradicting the administration's initial claim that the the Sept.", + "length": 120 + }, + { + "text": "'Next week’s hearing will expose new facts and details that the Obama Administration has tried to suppress,' Issa said.", + "length": 121 + }, + { + "text": "They want to tell their story and the administration is going to do everything it can to stop them from testifying under oath in public.", + "length": 136 + }, + { + "text": "But a lawyer representing one of those potential whistle-blowers is insisting that the White House is stonewalling congressional requests for their testimony.", + "length": 158 + }, + { + "text": "11, 2012 military-style assault on the diplomatic compound began as a 'protest' sparked by a low-budget YouTube video that lampooned the Muslim prophet Muhammad.", + "length": 161 + }, + { + "text": "At least four surviving witnesses to the Benghazi attack have retained attorneys to help them navigate the process of testifying before Congress about what they saw.", + "length": 165 + }, + { + "text": "' Joe DiGenova, shown here in a file photo, said Benghazi survivors 'want to tell their story and the [Obama] administration is going to do everything it can to stop them.", + "length": 171 + }, + { + "text": "' Both the CIA and the State Department have told Congress that no employees have requested the security clearances that their lawyers would need before talking to Congress.", + "length": 173 + }, + { + "text": "House Republicans issued a scathing report on April 23 criticizing the Obama administration for a series of intelligence failures and an attempted cover-up after the Benghazi raid.", + "length": 180 + }, + { + "text": "' The FBI released these photos on Wednesday of individuals who sere seen near the Benghazi consulate on the day Libyan terrorists attack it, killing four Americans including the U.", + "length": 181 + }, + { + "text": "Trey Gowdy, who serves on the House oversight committee, said Wednesday that he was 'not at liberty to disclose the identity of the witnesses' who will appear at next week's hearing.", + "length": 182 + }, + { + "text": "But Toensing, a former Justice Department official and Republican counsel to the Senate Intelligence Committee, told Fox News that administration officials are issuing 'some very despicable threats to people ...", + "length": 211 + }, + { + "text": "'I'm not familiar with this notion that anybody has been blocked from testifying,” the president said in response to a reporter's question, 'so what I’ll do is I will find out what exactly you’re referring to.", + "length": 215 + }, + { + "text": "Attorney Joe DiGenova, explained on Monday that 'the Department of State is refusing to grant clearances to Victoria and other people who want to represent the whistle-blowers, in an attempt to prevent the testimony.", + "length": 216 + }, + { + "text": "Jay Carney insisted in a press briefing that no administration employees had sought security clearances so they could testify about he Benghazi raid, even after the House Oversight Committee asked for those very clearances .", + "length": 224 + }, + { + "text": "California Republican Rep\u003e Darrell Issa (L) has announced a hearing in which the House Oversight committee, which he chairs, will hear testimony from survivors who say they have been blocked from telling Congress what they know.", + "length": 228 + }, + { + "text": "' Attorney Victoria Toensing represents a survivor of the Benghazi attack whom she says the Obama administration has blocked from testifying by refusing to give her the necessary security clearance to learn what her own client knows .", + "length": 234 + }, + { + "text": "When then-Secretary of State Hillary Clinton testified before Congress, she disputed that it was important to determine whether the raid was carried out by terrorist militants or by an angry group of protesters, as the administration first claimed .", + "length": 249 + }, + { + "text": "The State Department, she said during a separate Fox News interview, has 'had two letters from Chairman Issa, one on April 16, the other one April 26, that specifically say, \"We want you to provide a process for clearing a lawyer to receive classified information.", + "length": 264 + }, + { + "text": "Trey Gowdy (R) has promised that the Benghazi attack is 'not going away despite the efforts of this [Obama] administration' Attorney Victoria Toensing said Tuesday that she is representing one of them, a man who has been cowed into silence under threats of damage to his career.", + "length": 278 + }, + { + "text": "'This Administration has offered the American people only a carefully selected and sanitized version of events from before, during, and after the Benghazi terrorist attacks, committee chairman Darrell Issa, a Republican congressman from California, said in a statement Wednesday.", + "length": 279 + }, + { + "text": "And on Wednesday, Obama's chief spokesman Jay Carney insisted during a White House press briefing that 'Benghazi happened a long time ago,' and that 'we are unaware of any agency blocking an employee who would like to appear before Congress to provide information related to Benghazi.", + "length": 284 + }, + { + "text": "'On April 26, Congressman Issa sent a letter to the new Sec of State, John Kerry, demanding that the lawyers who are going to represent the whistle-blowers be cleared - be given clearances - so they can talk to their clients and the committee about classified information,' DiGenova said.", + "length": 288 + }, + { + "text": "The House Oversight and Government Reform Committee announced earlier in the day that it will convene a hearing on May 8 aimed at 'exposing failure' in the Obama administration to respond o security threats to that diplomatic mission, and to present to the public and to Congress an accurate version of the attack that left U.", + "length": 326 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7261962592601776 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:54.58663717Z", + "first_section_created": "2025-12-23T09:32:54.586983884Z", + "last_section_published": "2025-12-23T09:32:54.587289496Z", + "all_results_received": "2025-12-23T09:32:54.678927369Z", + "output_generated": "2025-12-23T09:32:54.679105976Z", + "total_processing_time_ms": 92, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 91, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:54.586983884Z", + "publish_time": "2025-12-23T09:32:54.587203793Z", + "first_worker_start": "2025-12-23T09:32:54.587740814Z", + "last_worker_end": "2025-12-23T09:32:54.678029Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:54.587771616Z", + "start_time": "2025-12-23T09:32:54.587861519Z", + "end_time": "2025-12-23T09:32:54.587974524Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:54.588111Z", + "start_time": "2025-12-23T09:32:54.588269Z", + "end_time": "2025-12-23T09:32:54.678029Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 89 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:54.587688312Z", + "start_time": "2025-12-23T09:32:54.587745815Z", + "end_time": "2025-12-23T09:32:54.587835418Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:54.587670111Z", + "start_time": "2025-12-23T09:32:54.587740814Z", + "end_time": "2025-12-23T09:32:54.587791616Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:54.587235994Z", + "publish_time": "2025-12-23T09:32:54.587289496Z", + "first_worker_start": "2025-12-23T09:32:54.587844718Z", + "last_worker_end": "2025-12-23T09:32:54.655125Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:54.58787302Z", + "start_time": "2025-12-23T09:32:54.587911121Z", + "end_time": "2025-12-23T09:32:54.587967023Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:54.588216Z", + "start_time": "2025-12-23T09:32:54.588347Z", + "end_time": "2025-12-23T09:32:54.655125Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:54.587819817Z", + "start_time": "2025-12-23T09:32:54.587856919Z", + "end_time": "2025-12-23T09:32:54.587920622Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:54.587798617Z", + "start_time": "2025-12-23T09:32:54.587844718Z", + "end_time": "2025-12-23T09:32:54.58787272Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 155, + "min_processing_ms": 66, + "max_processing_ms": 89, + "avg_processing_ms": 77, + "median_processing_ms": 89, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3803, + "slowest_section_id": 0, + "slowest_section_time_ms": 91 + } +} diff --git a/data/output/001b6dccba3ab0efb4296e04669bac42f5bee886.json b/data/output/001b6dccba3ab0efb4296e04669bac42f5bee886.json new file mode 100644 index 0000000..fbdbe88 --- /dev/null +++ b/data/output/001b6dccba3ab0efb4296e04669bac42f5bee886.json @@ -0,0 +1,366 @@ +{ + "file_name": "001b6dccba3ab0efb4296e04669bac42f5bee886.txt", + "total_words": 673, + "top_n_words": [ + { + "word": "the", + "count": 34 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "at", + "count": 12 + }, + { + "word": "jacob", + "count": 10 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "was", + "count": 9 + }, + { + "word": "died", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Ms .", + "length": 4 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Jill Reilly .", + "length": 13 + }, + { + "text": "03:35 EST, 22 October 2013 .", + "length": 28 + }, + { + "text": "02:54 EST, 22 October 2013 .", + "length": 28 + }, + { + "text": "He said that while Jacob has .", + "length": 30 + }, + { + "text": "treatment… will be successful'.", + "length": 33 + }, + { + "text": "their own personalities,' she said.", + "length": 35 + }, + { + "text": "Jacob remained in intensive care and .", + "length": 38 + }, + { + "text": "But the boys recovered at different rates.", + "length": 42 + }, + { + "text": "joined in a similar way to Joshua and Jacob.", + "length": 44 + }, + { + "text": "he'll be up and going and have a pretty normal lifespan.", + "length": 56 + }, + { + "text": "the all-clear to go home with the twin's mother Adrienne Spates.", + "length": 64 + }, + { + "text": "condition is extremely rare, only about one in every 200,000 live .", + "length": 67 + }, + { + "text": "Joshua and Jacob are one of only six such cases in Memphis history.", + "length": 67 + }, + { + "text": "Doctors had been optimistic about the twins' chances after separation.", + "length": 70 + }, + { + "text": "births is a set of conjoined twins and about 15 per cent of these are .", + "length": 71 + }, + { + "text": "saw each other for the first time: 'I'm glad that everyone gets to see .", + "length": 72 + }, + { + "text": "Spates, a single mother who has two other children, cried when her sons .", + "length": 73 + }, + { + "text": "my baby and view them as separate people because they are and they have .", + "length": 73 + }, + { + "text": "needed few more operations, while Joshua recovered quicker and was given .", + "length": 74 + }, + { + "text": "more serious heart problems 'our cardiology team has very high hopes his .", + "length": 74 + }, + { + "text": "Jacob and Joshua pictured after their gruelling 13-hour operation in 2011 .", + "length": 75 + }, + { + "text": "Fewer than a third of conjoined twins survive more than one day after birth.", + "length": 76 + }, + { + "text": "It was unclear whether the children were still in DCS custody when Jacob died.", + "length": 78 + }, + { + "text": "Fused: A 3D reconstruction shows how the boys' vertebrae were joined at the base .", + "length": 82 + }, + { + "text": "Doctors at the hospital said practice was the key to separating the twins successfully.", + "length": 87 + }, + { + "text": "They were attached at the pelvis and lower spine, but had separate hearts, heads and limbs.", + "length": 91 + }, + { + "text": "Just two dozen conjoined twins have ever been successfully separated anywhere in the world.", + "length": 91 + }, + { + "text": "Joined: This MRI scan provided by the hospital shows just how the pair were fused together .", + "length": 92 + }, + { + "text": "Relief: Joshua and Jocob's mother Adrienne was overwhelmed that all went to plan at hospital .", + "length": 94 + }, + { + "text": "A'zhari Jones died just days after celebrating her first birthday with her twin sister A'zhiah.", + "length": 95 + }, + { + "text": "A'zhari Jones died just days after celebrating her first birthday with her twin sister A'zhiah .", + "length": 96 + }, + { + "text": "2011 Dr Max Langham, one of the hospital's surgeons told TODAY: 'Joshua's doing great, and hopefully .", + "length": 102 + }, + { + "text": "A formerly conjoined twin who was separated from his brother at just eight-weeks-old has died aged two.", + "length": 103 + }, + { + "text": "A formerly conjoined twin who was separated from his brother at just eight-weeks-old has died aged two.", + "length": 103 + }, + { + "text": "Last week a formerly conjoined twin who was surgically separated from her sister in April died in Virginia.", + "length": 107 + }, + { + "text": "Last week a formerly conjoined twin who was surgically separated from her sister in April died in Virginia.", + "length": 107 + }, + { + "text": "It is as yet unknown whether the death of A'zhari Jones was caused by complications from the separation surgery.", + "length": 112 + }, + { + "text": "'If they had not been separated, sometime in the next year or two, they probably would have passed,' Dr Langham said.", + "length": 117 + }, + { + "text": "At eight-weeks-old they had to be separated because of the severity of Jacob's heart condition and endured a 13-hour operation.", + "length": 127 + }, + { + "text": "In April 2012 the twins were placed in the custody of the Department of Child Services following the DUI arrest of their mother.", + "length": 128 + }, + { + "text": "Jacob Spates died last week according to Le Bonheur Children’s Hospital in Memphis, although the cause of his death is unclear.", + "length": 129 + }, + { + "text": "Battling: A team of surgeons at Le Bonheur Children's Hospital in Memphis worked for 13 hours to separate the babies and keep them alive .", + "length": 139 + }, + { + "text": "The moment Jacob and Joshua see each other for the first time at Le Bonheur Children's Hospital in Memphis, where they were born two years ago .", + "length": 144 + }, + { + "text": "This had included the anaesthesia team sewing together two Cabbage Patch dolls to practise flipping them without tangling the various lines that would be attached during surgery.", + "length": 178 + }, + { + "text": "Jacob and his brother Joshua were joined at the rear of the pelvis and gastrointestinal tract when they were born January 24, 2011 via cesarean section at the Regional Medical Center.", + "length": 183 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5083989500999451 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:55.088037867Z", + "first_section_created": "2025-12-23T09:32:55.088317278Z", + "last_section_published": "2025-12-23T09:32:55.088594389Z", + "all_results_received": "2025-12-23T09:32:55.160567674Z", + "output_generated": "2025-12-23T09:32:55.160732381Z", + "total_processing_time_ms": 72, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:55.088317278Z", + "publish_time": "2025-12-23T09:32:55.088594389Z", + "first_worker_start": "2025-12-23T09:32:55.089049108Z", + "last_worker_end": "2025-12-23T09:32:55.159676Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:55.089001806Z", + "start_time": "2025-12-23T09:32:55.089073409Z", + "end_time": "2025-12-23T09:32:55.089160712Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:55.089267Z", + "start_time": "2025-12-23T09:32:55.089405Z", + "end_time": "2025-12-23T09:32:55.159676Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:55.088969504Z", + "start_time": "2025-12-23T09:32:55.089049108Z", + "end_time": "2025-12-23T09:32:55.089128111Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:55.089088609Z", + "start_time": "2025-12-23T09:32:55.089178113Z", + "end_time": "2025-12-23T09:32:55.089212714Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3881, + "slowest_section_id": 0, + "slowest_section_time_ms": 71 + } +} diff --git a/data/output/001b8ecda7b31fff8ab04a99a0455336477f09a1.json b/data/output/001b8ecda7b31fff8ab04a99a0455336477f09a1.json new file mode 100644 index 0000000..8259727 --- /dev/null +++ b/data/output/001b8ecda7b31fff8ab04a99a0455336477f09a1.json @@ -0,0 +1,278 @@ +{ + "file_name": "001b8ecda7b31fff8ab04a99a0455336477f09a1.txt", + "total_words": 632, + "top_n_words": [ + { + "word": "a", + "count": 21 + }, + { + "word": "the", + "count": 21 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "her", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "she", + "count": 13 + }, + { + "word": "hawke", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "was", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "We are in mourning.", + "length": 19 + }, + { + "text": "We need to be with family now.", + "length": 30 + }, + { + "text": "I am going to let justice do its job.", + "length": 37 + }, + { + "text": "The legal driving limit in the UK is 80milligrams.", + "length": 50 + }, + { + "text": "Pictures show her beaming during her recent travels through France.", + "length": 67 + }, + { + "text": "There are grey areas here, and the investigation will make them clearer.", + "length": 72 + }, + { + "text": "Miss Hawke was starved of oxygen, had a heart attack and went into a coma.", + "length": 74 + }, + { + "text": "I am looking after my family, my son, that is what is important to me now.", + "length": 74 + }, + { + "text": "In one, she smiles widely in front of a setting sun with a fan cooling her face.", + "length": 80 + }, + { + "text": "She was taken to the nearby Centre Hospitalier in Pau, where she died on September 30.", + "length": 86 + }, + { + "text": "Beaming: The mother-to-be regularly updated friends on her pregnancy and was ecstatic about having a baby .", + "length": 107 + }, + { + "text": "Glowing in the evening sunshine, expectant mother Xynthia Hawke looks ecstatic to be on the verge of motherhood.", + "length": 112 + }, + { + "text": "Anaesthetist Helga Wauters, 45, allegedly inserted a tube into Miss Hawke’s oesophagus instead of her windpipe.", + "length": 113 + }, + { + "text": "Botched care: Belgian Helga Wauters, 45, has been charged with aggravated manslaughter and faces up to five years in prison .", + "length": 125 + }, + { + "text": "Miss Hawke, who worked as a recruiter for businesses looking for multi-lingual employees, updated friends on her pregnancy using social media.", + "length": 142 + }, + { + "text": "Wauters, who is Belgian, was working despite difficulties with ‘expression, comprehension and reactivity’, according to a local prosecutor.", + "length": 143 + }, + { + "text": "Happy couple:  Ms Hawke and Mr Balthazar pose for a selfie (left) and the pregnant mother-to-be shops in a market just weeks before her death .", + "length": 145 + }, + { + "text": "After their healthy baby boy was delivered at a private maternity clinic in Orthez on September 26, a resuscitation procedure went horribly wrong.", + "length": 146 + }, + { + "text": "But days later, the 28-year-old Briton was killed by a doctor, who was reported to have been drunk and to have botched her care during childbirth.", + "length": 146 + }, + { + "text": "The doctor, who has admitted a ‘pathological problem with alcohol’, now faces five years behind bars after being charged with aggravated manslaughter.", + "length": 154 + }, + { + "text": "Fighting back tears outside the home where they settled a few months ago, her French partner Yannick Balthazar, 33, said: ‘It is a very difficult time for us.", + "length": 160 + }, + { + "text": "’ Miss Hawke required a caesarean section after going into labour last month in Ustaritz, the village in the French Pyrenees where she had moved with Mr Balthazar.", + "length": 165 + }, + { + "text": "Staff said she was slurring her words to the extent that she ‘could not be understood and she did not seem to understand what people were saying’, a source added.", + "length": 166 + }, + { + "text": "Miss Hawke grew up in North Petherton, Somerset, where she excelled at her comprehensive, Haygrove School, and won a prize for outstanding achievement in GCSE French.", + "length": 166 + }, + { + "text": "When questioned on the day Miss Hawke died, Wauters was found to have 216milligrams of alcohol per 100 millilitres of blood – the equivalent of four bottles of wine.", + "length": 167 + }, + { + "text": "Miss Hawke grew up in North Petherton, Somerset, where she excelled at her comprehensive, Haygrove School, and won a prize for outstanding achievement in GCSE French .", + "length": 167 + }, + { + "text": "She holidayed in France with her parents Fraser and Clare, now 61 and 56, and her older sister Iris, 30, before moving to Paris to study at the University of London Institute.", + "length": 175 + }, + { + "text": "Devastated: Yannick Balthazar must now raise his son alone after his partner, Xynthia Hawke, was killed by in a botched caesarean section by an anaesthetist who is said to have been drunk at the time .", + "length": 204 + }, + { + "text": "Wauters’s lawyer, Florence Hegoburu, warned against any ‘hasty conclusions’, adding: ‘My client will assume her responsibilities in relation to the facts that she recognises, but she is not solely responsible.", + "length": 217 + }, + { + "text": "Miss Hawke’s devastated parents are believed to have flown to the South of France – where she had been living an idyllic life with her partner – to help him care for the baby boy, who survived and has been named Isaac.", + "length": 224 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4578551650047302 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:55.589392362Z", + "first_section_created": "2025-12-23T09:32:55.589768677Z", + "last_section_published": "2025-12-23T09:32:55.589993686Z", + "all_results_received": "2025-12-23T09:32:55.654972691Z", + "output_generated": "2025-12-23T09:32:55.655125697Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:55.589768677Z", + "publish_time": "2025-12-23T09:32:55.589993686Z", + "first_worker_start": "2025-12-23T09:32:55.590416303Z", + "last_worker_end": "2025-12-23T09:32:55.654084Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:55.590569509Z", + "start_time": "2025-12-23T09:32:55.590647512Z", + "end_time": "2025-12-23T09:32:55.590740816Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:55.590759Z", + "start_time": "2025-12-23T09:32:55.590905Z", + "end_time": "2025-12-23T09:32:55.654084Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:55.590404403Z", + "start_time": "2025-12-23T09:32:55.590476106Z", + "end_time": "2025-12-23T09:32:55.590603311Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:55.590357201Z", + "start_time": "2025-12-23T09:32:55.590416303Z", + "end_time": "2025-12-23T09:32:55.590512707Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3764, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/001be24b2db1c04f62386f98997fee725c5fd2fb.json b/data/output/001be24b2db1c04f62386f98997fee725c5fd2fb.json new file mode 100644 index 0000000..1d4b659 --- /dev/null +++ b/data/output/001be24b2db1c04f62386f98997fee725c5fd2fb.json @@ -0,0 +1,334 @@ +{ + "file_name": "001be24b2db1c04f62386f98997fee725c5fd2fb.txt", + "total_words": 706, + "top_n_words": [ + { + "word": "the", + "count": 53 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "is", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "court", + "count": 11 + }, + { + "word": "marriage", + "count": 11 + }, + { + "word": "federal", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Constitution.", + "length": 13 + }, + { + "text": "Hollingsworth v.", + "length": 16 + }, + { + "text": "United States v.", + "length": 16 + }, + { + "text": "So per its rules, the U.", + "length": 24 + }, + { + "text": "Federalism is a two-way street.", + "length": 31 + }, + { + "text": "There's a second issue in Windsor.", + "length": 34 + }, + { + "text": "DOMA passed in 1996 with 78% of the U.", + "length": 38 + }, + { + "text": "Only the states determine who can get married.", + "length": 46 + }, + { + "text": "The whole nation is focused on the litigation.", + "length": 46 + }, + { + "text": "Windsor would alter America's system of federalism.", + "length": 51 + }, + { + "text": "In Windsor, the defendant was the federal government.", + "length": 53 + }, + { + "text": "Justice Anthony Kennedy is likely the swing vote in both.", + "length": 57 + }, + { + "text": "If Windsor is historic, Hollingsworth is earth-shattering.", + "length": 58 + }, + { + "text": "Thus it's possible she lacks standing to sue over the issue.", + "length": 60 + }, + { + "text": "This week the Supreme Court heard two historic cases on marriage.", + "length": 65 + }, + { + "text": "One requirement is that there must be adversity between the parties.", + "length": 68 + }, + { + "text": "House and 85% of the Senate and was signed by President Bill Clinton.", + "length": 69 + }, + { + "text": "Assuming the court does decide the merits, the implications are historic.", + "length": 73 + }, + { + "text": "Ironically, it's possible that neither case will be decided on the merits.", + "length": 74 + }, + { + "text": "Article III of the Constitution limits the jurisdiction of the federal courts.", + "length": 78 + }, + { + "text": "Edith Windsor entered into a gay marriage in Canada in 2007 and lived in New York.", + "length": 82 + }, + { + "text": "The swing vote regarding the Article III issues in both cases is probably Chief Justice John Roberts.", + "length": 101 + }, + { + "text": "Perry is about whether state laws defining marriage as one man and one woman violate the 14th Amendment of the U.", + "length": 113 + }, + { + "text": "But if DOMA Section 3 is invalidated, the states will be able to dictate whom the recipients of federal benefits are.", + "length": 117 + }, + { + "text": "When her partner died in 2009, Windsor sued to contest the federal estate tax she paid, claiming a spousal exemption.", + "length": 117 + }, + { + "text": "But the outcomes range from nothing at all to fundamentally restructuring the foundational unit of western civilization.", + "length": 120 + }, + { + "text": "House voted to authorize Paul Clement — probably the greatest Supreme Court lawyer practicing today — to defend DOMA.", + "length": 122 + }, + { + "text": "As Justice Samuel Alito said this week, the Internet and cell phones have been around on this planet longer than gay marriage.", + "length": 126 + }, + { + "text": "It also raises the issue of whether courts must recognize polygamous marriages, which are legal in dozens of nations worldwide.", + "length": 127 + }, + { + "text": "Even though I was a lawyer in the litigation and in the courthouse both days, I can't predict which way the court will come down.", + "length": 129 + }, + { + "text": "But President Barack Obama declared that he believes DOMA is unconstitutional and ordered his Justice Department not to defend it.", + "length": 130 + }, + { + "text": "In Hollingsworth, California's governor and attorney general abdicated their duties by refusing to defend their state constitution.", + "length": 131 + }, + { + "text": "On the merits, the court seems unlikely to declare an unwritten constitutional right to gay marriage, though arguments did not go as well for DOMA.", + "length": 147 + }, + { + "text": "But the federal government is free to decide whom to confer federal benefits on — largely economic entitlements and federal issues such as immigration.", + "length": 153 + }, + { + "text": "He openly expressed skepticism in Hollingsworth and led the Court in an hour-long debate in Windsor solely focused on whether the court has jurisdiction.", + "length": 153 + }, + { + "text": "Windsor asks whether Section 3 of the Defense of Marriage Act, which defines marriage for federal law and programs as between one man and one woman, is unconstitutional.", + "length": 169 + }, + { + "text": "Since the defendants in both cases refused to defend their own laws, the court will consider whether the Constitution allows these third-party legal teams to become a proper party to the lawsuits.", + "length": 196 + }, + { + "text": "So pursuant to California law, the sponsors of Prop 8 — officially registered with the state — stepped in to defend the law, represented by Charles Cooper at Cooper Kirk and the Alliance Defending Freedom.", + "length": 209 + }, + { + "text": "It is an energetic debate in all 50 states, and this summer we will learn whether the Supreme Court will shut down this debate by making it a constitutional issue on which the American people are not allowed to vote.", + "length": 216 + }, + { + "text": "But New York did not create gay marriage until 2011, so Windsor was not harmed by DOMA not allowing the federal government to recognize her marriage, since if the IRS used state definitions Windsor would still be regarded an a single woman.", + "length": 240 + }, + { + "text": "If the Supreme Court declares a constitutional right to marriage other than one-man, one-woman, then all traditional marriage laws in all 50 states will be invalid, and there will be a serious debate (already in a lower federal court) of whether polygamists also have a constitutional right to national recognition.", + "length": 315 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4874154031276703 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:56.090916764Z", + "first_section_created": "2025-12-23T09:32:56.092212116Z", + "last_section_published": "2025-12-23T09:32:56.092504728Z", + "all_results_received": "2025-12-23T09:32:56.154711321Z", + "output_generated": "2025-12-23T09:32:56.154889128Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:56.092212116Z", + "publish_time": "2025-12-23T09:32:56.092504728Z", + "first_worker_start": "2025-12-23T09:32:56.092872342Z", + "last_worker_end": "2025-12-23T09:32:56.153754Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:56.092908644Z", + "start_time": "2025-12-23T09:32:56.09306345Z", + "end_time": "2025-12-23T09:32:56.093150354Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:56.093153Z", + "start_time": "2025-12-23T09:32:56.093298Z", + "end_time": "2025-12-23T09:32:56.153754Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:56.092885543Z", + "start_time": "2025-12-23T09:32:56.092969946Z", + "end_time": "2025-12-23T09:32:56.093106152Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:56.09280204Z", + "start_time": "2025-12-23T09:32:56.092872342Z", + "end_time": "2025-12-23T09:32:56.092925145Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4229, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/001c2d7a6e17fea9bf7ae18ced63647ab26b510e.json b/data/output/001c2d7a6e17fea9bf7ae18ced63647ab26b510e.json new file mode 100644 index 0000000..1684c1a --- /dev/null +++ b/data/output/001c2d7a6e17fea9bf7ae18ced63647ab26b510e.json @@ -0,0 +1,274 @@ +{ + "file_name": "001c2d7a6e17fea9bf7ae18ced63647ab26b510e.txt", + "total_words": 614, + "top_n_words": [ + { + "word": "the", + "count": 31 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "a", + "count": 18 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "he", + "count": 14 + }, + { + "word": "on", + "count": 14 + }, + { + "word": "his", + "count": 12 + }, + { + "word": "i", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "but", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "'But it has obviously not been happening.", + "length": 41 + }, + { + "text": "15am on March 19 when he hit the pothole.", + "length": 41 + }, + { + "text": "' He was on his way to work at Boots in Hereford at 5.", + "length": 54 + }, + { + "text": "Once they are there and reported, they should be filled in.", + "length": 59 + }, + { + "text": "'The main roads are okay, but the side roads are appalling.", + "length": 59 + }, + { + "text": "He added: 'I had my lights on but I didn’t see the pothole.", + "length": 61 + }, + { + "text": "Tidying up the roads must be one of Herefordshire Council’s top priorities.", + "length": 77 + }, + { + "text": "'I’ve been cycling for 20 years and on the side roads it has definitely got worse.", + "length": 84 + }, + { + "text": "'I didn’t quite go over the handlebars but went down and made sure my body took the impact.", + "length": 93 + }, + { + "text": "'I landed on my elbow, which ended up broken and also had lots of grazes on the side of my face.", + "length": 96 + }, + { + "text": "'I cycle every day to work at Boots and normally I go straight down and stick to the A and B roads.", + "length": 99 + }, + { + "text": "'I shattered a bone in my elbow which required a plate and screws and was operated on the next day.", + "length": 99 + }, + { + "text": "He was taken to hospital by his brother where doctors discovered he had shattered a bone in his elbow.", + "length": 102 + }, + { + "text": "'People pay their council taxes and all you ask is they collect the bins on time and keep the roads tidy.", + "length": 105 + }, + { + "text": "He was taken to hospital and had the operation on his elbow the next day before being sent home on March 21.", + "length": 108 + }, + { + "text": "Shocking pictures taken at Hereford County Hospital’s A\u0026E show Mr Gummery's face caked in blood following the painful fall.", + "length": 125 + }, + { + "text": "Horrific: Richard Gummery at hospital after he broke his arm and gashed his face after crashing into a pothole on his bicycle.", + "length": 126 + }, + { + "text": "He said: 'We desperately need to act to fix the potholes on our roads to make the county safe again for cyclists and motorists.", + "length": 127 + }, + { + "text": "He needed an emergency three-hour operation and spent two nights in hospital before being sent home in a plaster cast to recover.", + "length": 129 + }, + { + "text": "He needed an emergency three-hour operation and spent two nights in hospital before being sent home in a plaster cast to recover .", + "length": 130 + }, + { + "text": "Today, Mr Gummery, who lives in Much Dewchurch, Herefordshire, blasted his local council for not doing more to keep the roads safe.", + "length": 131 + }, + { + "text": "He said: 'You often have people like me who work early hours and when you cannot see the potholes it becomes particularly difficult.", + "length": 132 + }, + { + "text": "Richard Gummery, 45, was pedalling downhill at 30mph but didn’t spot the massive crater - which was 18 inches long and 5inches deep.", + "length": 134 + }, + { + "text": "'It’s not too painful now but there’s a huge cast on it and it’s restrictive but hopefully I’ll be more mobile in a few weeks time.", + "length": 139 + }, + { + "text": "'But I decided to go down Knocker Hill Lane which is a country road and was going downhill so probably going at 30mph when I hit the pothole.", + "length": 141 + }, + { + "text": "The father-of-one - who cycles 70 miles a week - was catapulted off his bike and suffered a broken elbow as well as multiple cuts to his face and head.", + "length": 151 + }, + { + "text": "This horrific picture shows the appalling injuries suffered by a cyclist when he was thrown from his bike on his way to work after hitting a giant pothole.", + "length": 155 + }, + { + "text": "Mr Gummery said he had cycled the seven-and-a-half mile journey five days a week for the last 20 years but this is the first time he has had a serious accident.", + "length": 160 + }, + { + "text": "' Transport secretary Patrick McLoughlin confirmed this week that Herefordshire Council will get £3,538,803 for road repairs following the wet winter and North Herefordshire’s Tory MP Bill Wiggin has urged the council to use the money to make the roads safe.", + "length": 261 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8947777152061462 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:56.593352102Z", + "first_section_created": "2025-12-23T09:32:56.593622313Z", + "last_section_published": "2025-12-23T09:32:56.593830522Z", + "all_results_received": "2025-12-23T09:32:56.661594838Z", + "output_generated": "2025-12-23T09:32:56.661796246Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:56.593622313Z", + "publish_time": "2025-12-23T09:32:56.593830522Z", + "first_worker_start": "2025-12-23T09:32:56.594306541Z", + "last_worker_end": "2025-12-23T09:32:56.660603Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:56.59430104Z", + "start_time": "2025-12-23T09:32:56.594359443Z", + "end_time": "2025-12-23T09:32:56.594416045Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:56.594563Z", + "start_time": "2025-12-23T09:32:56.594721Z", + "end_time": "2025-12-23T09:32:56.660603Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:56.594252139Z", + "start_time": "2025-12-23T09:32:56.594324741Z", + "end_time": "2025-12-23T09:32:56.594408745Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:56.594240338Z", + "start_time": "2025-12-23T09:32:56.594306541Z", + "end_time": "2025-12-23T09:32:56.594347642Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3244, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/001c839e1d76c400129f6c2799957c74e9895815.json b/data/output/001c839e1d76c400129f6c2799957c74e9895815.json new file mode 100644 index 0000000..4f92fcf --- /dev/null +++ b/data/output/001c839e1d76c400129f6c2799957c74e9895815.json @@ -0,0 +1,448 @@ +{ + "file_name": "001c839e1d76c400129f6c2799957c74e9895815.txt", + "total_words": 1016, + "top_n_words": [ + { + "word": "the", + "count": 72 + }, + { + "word": "of", + "count": 32 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "ebola", + "count": 23 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "is", + "count": 19 + }, + { + "word": "they", + "count": 17 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "are", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "\"They died.", + "length": 11 + }, + { + "text": "Safe burials .", + "length": 14 + }, + { + "text": "Town abandoned .", + "length": 16 + }, + { + "text": "Quarantine zone .", + "length": 17 + }, + { + "text": "To get it to zero.", + "length": 18 + }, + { + "text": "Only one survived.", + "length": 18 + }, + { + "text": "Ebola nightmares .", + "length": 18 + }, + { + "text": "We have to fight it.", + "length": 20 + }, + { + "text": "They died,\" he says.", + "length": 20 + }, + { + "text": "\"It is very heartbreaking.", + "length": 26 + }, + { + "text": "They are all gone,\" says Johnson.", + "length": 33 + }, + { + "text": "People preferred driving us away.", + "length": 33 + }, + { + "text": "\" \"We need food, we really need medicine.", + "length": 41 + }, + { + "text": "Now they are disclosing death to us,\" he explains.", + "length": 50 + }, + { + "text": "They suit up: from head to toe, no skin is exposed.", + "length": 51 + }, + { + "text": "READ MORE: Ebola: Your biggest questions answered .", + "length": 51 + }, + { + "text": "\"Because we do farm here and now there's been no farming.", + "length": 57 + }, + { + "text": "READ MORE: Ebola: Nine things to know about the disease .", + "length": 57 + }, + { + "text": "\"It was kind of difficult for communities to disclose death.", + "length": 60 + }, + { + "text": "\" READ MORE: Ebola death toll passes 1,550, outbreak worsens .", + "length": 62 + }, + { + "text": "\"We must do everything we can to kick Ebola out of our country.", + "length": 63 + }, + { + "text": "The toll of the isolation is weighing heavily on the community.", + "length": 63 + }, + { + "text": "No ceremony, no mourning, no family members, and no final goodbyes.", + "length": 67 + }, + { + "text": "You are working for the team at the front and you see them lying down.", + "length": 70 + }, + { + "text": "And not even the health workers are spared the ravaging effects of Ebola.", + "length": 73 + }, + { + "text": "\"So many people die -- the houses on your right and even the houses on your left.", + "length": 81 + }, + { + "text": "This is what life is like across Lofa: The people are locked in, afraid and alone.", + "length": 82 + }, + { + "text": "\"Several times I dream I become infected, I see myself in the case management center.", + "length": 85 + }, + { + "text": "\"Staying at home or running away from Ebola is not a solution, so we have to face it.", + "length": 85 + }, + { + "text": "\" \"We must be grateful for the communities, through the efforts of the local leaders.", + "length": 85 + }, + { + "text": "Today, the team has been called to a village where a woman has died of unknown causes.", + "length": 86 + }, + { + "text": "The Lofa County health team arrives carrying gloves, gowns, goggles and diluted bleach.", + "length": 87 + }, + { + "text": "It's hard to imagine another area in Lofa county that has been harder hit than this one.", + "length": 88 + }, + { + "text": "\"It is difficult to stand in front of Ebola, but this is the situation we have,\" he says.", + "length": 89 + }, + { + "text": "It may not have been from the deadly virus, but the villagers are not taking any chances.", + "length": 89 + }, + { + "text": "Wailing rents the air as the burial team walks out of the house carrying the body on a stretcher.", + "length": 97 + }, + { + "text": "On their hands they wear three layers of gloves, securing the edges with clear tape at the wrists.", + "length": 98 + }, + { + "text": "Before they enter the house to collect the body, one of them goes in and sprays the house with bleach.", + "length": 102 + }, + { + "text": "\"When it started, it wasn't that easy,\" says Alpha Tamba, an Ebola response coordinator in Lofa County.", + "length": 103 + }, + { + "text": "Home to more than 8,000 people, Barkedu is now under quarantine: no one can go in, and no one can go out.", + "length": 105 + }, + { + "text": "Day by day, they are dying,\" says Tamba, who admits the harrowing work he does has caused him nightmares.", + "length": 105 + }, + { + "text": "\"Sometimes we go to bed and we dream of nothing else but Ebola, Ebola, Ebola -- nothing else,\" he explains.", + "length": 107 + }, + { + "text": "Johnson says he lost his 8-months-pregnant sister, his brother, niece and many, many others: too many to name.", + "length": 110 + }, + { + "text": "One of the local clinics had to be locked up after all the healthcare workers based there contracted the virus.", + "length": 111 + }, + { + "text": "But the most important one is medicine because the hospital is closed down, there is no health worker,\" he says.", + "length": 112 + }, + { + "text": "\"From the time we started receiving death from Ebola -- every activity cease,\" says Musa Sessay, the town's chief.", + "length": 114 + }, + { + "text": "Some of those crying are the dead woman's family members; for their own safety, they can only mourn from a distance.", + "length": 116 + }, + { + "text": "Sometimes when they are called in to investigate a case, they get there only to discover the victim is one of their own.", + "length": 120 + }, + { + "text": "Then -- and only then -- can the rest of the team enter to place the body in an airtight polythene bag, ready for burial.", + "length": 121 + }, + { + "text": "positive for the virus but -- because they reported it early and because of the medical teams' efforts -- later recovered.", + "length": 122 + }, + { + "text": "Zango Town, Liberia (CNN) -- At the gravesite in a northern Liberia village, there are no religious or traditional burial rites.", + "length": 128 + }, + { + "text": "Some of the residents abandoned the town in such a hurry that their clothes and floor mats have been left hanging on clotheslines.", + "length": 130 + }, + { + "text": "But then there's Barkedu Town -- of the 1,000 or so Ebola-related deaths in Liberia, 20% of the victims have died in this single town.", + "length": 134 + }, + { + "text": "These men are part of the country's Ebola response team, specifically tasked with burying anyone suspected to have died of the Ebola virus.", + "length": 139 + }, + { + "text": "Now almost anytime there is a suspected Ebola death in the community, they call in the Ebola response team to come and bury the body safely.", + "length": 140 + }, + { + "text": "A few kilometres away from the village is Zango Town: most of the houses here have been abandoned, their doors padlocked and windows shuttered.", + "length": 143 + }, + { + "text": "These positive outcomes keep Tamba hopeful as he and other health workers continue to tirelessly explain to the community how to prevent infection.", + "length": 147 + }, + { + "text": "The virus is spread through contact with the blood and body fluids of people infected with Ebola, and it is still transferable even from a dead body.", + "length": 149 + }, + { + "text": "Kazalee Johnson, a community worker, tells CNN the empty houses belong to people who either died of Ebola or those who fled in terror, for fear of contracting the virus.", + "length": 169 + }, + { + "text": "To help combat the spread of the disease, the Liberian government has directed that its citizens should not bury anyone who dies of, or is suspected of having been infected with, Ebola.", + "length": 185 + }, + { + "text": "Nothing but a group of men dressed in space-suit-like outfits, cautiously throwing the dead body into the grave, they pause only to toss in anything else they are wearing that came into contact with the deceased.", + "length": 212 + }, + { + "text": "For months Liberians ignored the directive, fearing that they would be ostracized by their communities if they admitted that their relatives had died of Ebola, but here in Lofa County -- ground zero of the country's outbreak -- almost everyone has witnessed the devastating suffering and numerous deaths caused by the virus.", + "length": 324 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.622543603181839 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:57.094612794Z", + "first_section_created": "2025-12-23T09:32:57.094996909Z", + "last_section_published": "2025-12-23T09:32:57.095339623Z", + "all_results_received": "2025-12-23T09:32:57.236616285Z", + "output_generated": "2025-12-23T09:32:57.236834494Z", + "total_processing_time_ms": 142, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 141, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:57.094996909Z", + "publish_time": "2025-12-23T09:32:57.095253619Z", + "first_worker_start": "2025-12-23T09:32:57.095846943Z", + "last_worker_end": "2025-12-23T09:32:57.22888Z", + "total_journey_time_ms": 133, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:57.095856644Z", + "start_time": "2025-12-23T09:32:57.095922246Z", + "end_time": "2025-12-23T09:32:57.09600475Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:57.096056Z", + "start_time": "2025-12-23T09:32:57.096208Z", + "end_time": "2025-12-23T09:32:57.22888Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 132 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:57.09576574Z", + "start_time": "2025-12-23T09:32:57.095846943Z", + "end_time": "2025-12-23T09:32:57.095965348Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:57.095820042Z", + "start_time": "2025-12-23T09:32:57.095895845Z", + "end_time": "2025-12-23T09:32:57.095973548Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:32:57.095293021Z", + "publish_time": "2025-12-23T09:32:57.095339623Z", + "first_worker_start": "2025-12-23T09:32:57.095902345Z", + "last_worker_end": "2025-12-23T09:32:57.235637Z", + "total_journey_time_ms": 140, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:57.095910946Z", + "start_time": "2025-12-23T09:32:57.09600585Z", + "end_time": "2025-12-23T09:32:57.096073752Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:57.096089Z", + "start_time": "2025-12-23T09:32:57.096208Z", + "end_time": "2025-12-23T09:32:57.235637Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 139 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:57.095902145Z", + "start_time": "2025-12-23T09:32:57.095935647Z", + "end_time": "2025-12-23T09:32:57.095955648Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:57.095864944Z", + "start_time": "2025-12-23T09:32:57.095902345Z", + "end_time": "2025-12-23T09:32:57.095910846Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 271, + "min_processing_ms": 132, + "max_processing_ms": 139, + "avg_processing_ms": 135, + "median_processing_ms": 139, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2859, + "slowest_section_id": 1, + "slowest_section_time_ms": 140 + } +} diff --git a/data/output/001c9b4f4f1f431b85bae0abaadf4e7666064f35.json b/data/output/001c9b4f4f1f431b85bae0abaadf4e7666064f35.json new file mode 100644 index 0000000..2282dd1 --- /dev/null +++ b/data/output/001c9b4f4f1f431b85bae0abaadf4e7666064f35.json @@ -0,0 +1,298 @@ +{ + "file_name": "001c9b4f4f1f431b85bae0abaadf4e7666064f35.txt", + "total_words": 641, + "top_n_words": [ + { + "word": "to", + "count": 26 + }, + { + "word": "her", + "count": 25 + }, + { + "word": "the", + "count": 19 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "she", + "count": 16 + }, + { + "word": "staniforth", + "count": 13 + }, + { + "word": "i", + "count": 12 + }, + { + "word": "said", + "count": 11 + }, + { + "word": "herself", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "\".", + "length": 2 + }, + { + "text": "org.", + "length": 4 + }, + { + "text": "samaritans.", + "length": 11 + }, + { + "text": "'I met Dianne coming up the stairs.", + "length": 35 + }, + { + "text": "I’m done now, I don’t want to live.", + "length": 39 + }, + { + "text": "I told her they were okay and not to worry.", + "length": 43 + }, + { + "text": "Her husband suffered serious burns to his hands.", + "length": 48 + }, + { + "text": "I jumped out of bed and heard the screaming again.", + "length": 50 + }, + { + "text": "'Later she beckoned me over and asked \"where are my kids?", + "length": 57 + }, + { + "text": "She was on fire, the hottest, brightest thing I’ve ever seen.", + "length": 63 + }, + { + "text": "She added: 'I don’t think she would have wanted to kill herself.", + "length": 66 + }, + { + "text": "' Mrs Baird urged her sister to leave her husband and build a new life.", + "length": 71 + }, + { + "text": "'If she got out she would go and get more wine or sit by the railway lines.", + "length": 75 + }, + { + "text": "She loved her children and I don’t think she would have wanted to leave them.", + "length": 79 + }, + { + "text": "' Mr Staniforth, a business consultant, said he wanted to separate from his wife.", + "length": 81 + }, + { + "text": "He said: 'I’d remove tablets and wine and lock the doors to stop Dianne going out.", + "length": 84 + }, + { + "text": "He told the inquest in Chesterfield, Derbyshire: 'I heard a scream like I’d never heard before.", + "length": 97 + }, + { + "text": "Pc Michelle Witham told the coroner: 'On arrival at hospital Mrs Staniforth was taken for resuscitation.", + "length": 104 + }, + { + "text": "An inquest in Chesterfield, Derbyshire, heard she then decided to 'teach him a lesson' and set herself on fire.", + "length": 111 + }, + { + "text": "' For confidential support call the Samaritans on 08457 90 90 90, visit a local Samaritans branch or go to www.", + "length": 111 + }, + { + "text": "The coroner was then told she set fire to herself while her husband and their two children were upstairs in bed.", + "length": 112 + }, + { + "text": "Dianne Staniforth, 51, discovered her husband Paul had been unfaithful after seeing a text message on his phone.", + "length": 112 + }, + { + "text": "Dianne Staniforth, 51, discovered her husband Paul had been unfaithful after seeing a text message on his phone .", + "length": 113 + }, + { + "text": "Mr Staniforth said in the six months before her death his wife had threatened to kill herself between 30 and 40 times.", + "length": 118 + }, + { + "text": "Mr Staniforth said in the six months before her death his wife had threatened to kill herself between 30 and 40 times .", + "length": 119 + }, + { + "text": "' Mrs Staniforth’s sister Julie Baird said on April 22, Dianne sent her a text saying: 'Paul slept with someone else.", + "length": 119 + }, + { + "text": "A mother set herself on fire to teach her cheating husband 'a lesson for having a one-night stand', an inquest has heard.", + "length": 121 + }, + { + "text": "She suffered third degree burns to 70 per cent of her body and died six days after the incident on Sunday, April 28 last year.", + "length": 126 + }, + { + "text": "He added: 'Dianne didn’t want me to leave and said she would kill herself but I never thought she would do anything like this.", + "length": 128 + }, + { + "text": "' Mr Staniforth said he pushed his wife into the bathroom and began running cold water over her body while her daughter, 15, dialled 999.", + "length": 137 + }, + { + "text": "Mr Staniforth said he was woken by the sound of her screaming and leapt out of bed to find her staggering up the stairs ‘like a fireball’.", + "length": 142 + }, + { + "text": "' The police officer then said that Mrs Staniforth told her she had set fire to herself to 'teach her husband a lesson' after finding out he was having 'an affair'.", + "length": 164 + }, + { + "text": "Mrs Staniforth, who worked as a civil servant at the Department of Work and Pensions, was rushed to Sheffield Hospital and then to a burns unit at Pinderfields Hospital, Wakefield, West Yorkshire .", + "length": 197 + }, + { + "text": "' Recording a verdict of misadventure, assistant coroner Paul McCandless said: 'She inflicted these burns upon herself in an effort to get back at her husband, but not in order to bring about her own death.", + "length": 206 + }, + { + "text": "An inquest in Chesterfield, Derbyshire, heard Mrs Staniford (pictured here with her husband in happier times) then decided to 'get her own back' The police officer then said that Mrs Staniforth told her she had set fire to herself to 'teach her husband a lesson' after finding out he was having 'an affair' The court heard how Mrs Staniforth had taken an overdose in October 2012.", + "length": 380 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6000899076461792 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:57.596086494Z", + "first_section_created": "2025-12-23T09:32:57.596403406Z", + "last_section_published": "2025-12-23T09:32:57.596611915Z", + "all_results_received": "2025-12-23T09:32:57.66460104Z", + "output_generated": "2025-12-23T09:32:57.664769147Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:57.596403406Z", + "publish_time": "2025-12-23T09:32:57.596611915Z", + "first_worker_start": "2025-12-23T09:32:57.597160637Z", + "last_worker_end": "2025-12-23T09:32:57.663689Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:57.597192738Z", + "start_time": "2025-12-23T09:32:57.59725274Z", + "end_time": "2025-12-23T09:32:57.597332944Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:57.597455Z", + "start_time": "2025-12-23T09:32:57.597584Z", + "end_time": "2025-12-23T09:32:57.663689Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:57.597086734Z", + "start_time": "2025-12-23T09:32:57.597160637Z", + "end_time": "2025-12-23T09:32:57.59724684Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:57.597099534Z", + "start_time": "2025-12-23T09:32:57.597176537Z", + "end_time": "2025-12-23T09:32:57.597204338Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3549, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/001cdbaf0607878f332e0202fadf5b82d2997c02.json b/data/output/001cdbaf0607878f332e0202fadf5b82d2997c02.json new file mode 100644 index 0000000..aeef26d --- /dev/null +++ b/data/output/001cdbaf0607878f332e0202fadf5b82d2997c02.json @@ -0,0 +1,290 @@ +{ + "file_name": "001cdbaf0607878f332e0202fadf5b82d2997c02.txt", + "total_words": 628, + "top_n_words": [ + { + "word": "the", + "count": 46 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "that", + "count": 14 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "it", + "count": 10 + }, + { + "word": "s", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "for", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "\"Is that fair?", + "length": 14 + }, + { + "text": "That is a massive amount.", + "length": 25 + }, + { + "text": "4-liter V8s on show last year.", + "length": 30 + }, + { + "text": "6-liter V6 engines, compared to the 2.", + "length": 38 + }, + { + "text": "Or it makes you throw something at the TV!", + "length": 42 + }, + { + "text": "No it's not, but it makes exciting racing.", + "length": 42 + }, + { + "text": "\" Interactive: 10 cars that changed Formula One .", + "length": 49 + }, + { + "text": "if he doesn't finish and another guy does he wins.", + "length": 50 + }, + { + "text": "I think the technology will flow to road cars very quickly.", + "length": 59 + }, + { + "text": "So until things settle down I wouldn't want to back anybody.", + "length": 60 + }, + { + "text": "\"How long it will take for other teams to catch up, who knows?", + "length": 62 + }, + { + "text": "\"They are trying to take efficiency from everywhere they can on a car.", + "length": 70 + }, + { + "text": "\"From that point of view, that's what road cars are becoming more and more.", + "length": 75 + }, + { + "text": "Someone could make a modification and gain one second, two seconds per lap.", + "length": 75 + }, + { + "text": "\" This year's race cars will boast an enhanced Energy Recovery System (ERS) and 1.", + "length": 82 + }, + { + "text": "\"I would've thought after the fourth, fifth race, you might see things settle down.", + "length": 83 + }, + { + "text": "\"Wherever there is heat, they turn that into energy,\" added the former Ferrari driver.", + "length": 86 + }, + { + "text": "\"If somebody is quite far ahead and it looks like he's going to win the championship ...", + "length": 88 + }, + { + "text": "\"What they are trying to do is make it so the last race determines the championship,\" he said.", + "length": 94 + }, + { + "text": "It's a move that Scheckter thinks will see the fight for the world championship go down to the wire.", + "length": 100 + }, + { + "text": "The ERS uses heat generated when braking and thermal energy from exhaust gases to create extra power.", + "length": 101 + }, + { + "text": "\" On the track, Scheckter expects an unpredictable start to the championship as teams and drivers wrestle with the new regulations.", + "length": 131 + }, + { + "text": "\"It's very important for the global environment that they can make the technology work practically and then it can move into road cars.", + "length": 135 + }, + { + "text": "(CNN) -- The big winners of this Formula One season could be road drivers rather than F1 racers, according to one former world champion.", + "length": 136 + }, + { + "text": "\"If you're going to follow some of the test results then you have to think that Mercedes and Williams have got an advantage at the beginning,\" he said.", + "length": 151 + }, + { + "text": "\"They've taken this energy from the brakes and these different areas, that's what Formula One has done to a much higher degree than I've ever seen before.", + "length": 154 + }, + { + "text": "The Kinetic Energy Recovery System (KERS) has been used in F1 since 2009, but Scheckter says these latest advancements in the sport will only benefit everyday drivers.", + "length": 167 + }, + { + "text": "An encouraging preseason for Mercedes has fueled talk that Lewis Hamilton is the favorite for this weekend's Australian Grand Prix and in pole position to take the title.", + "length": 170 + }, + { + "text": "Hamilton, a world champion in 2008, set the fastest time on the final day of the final test event in Bahrain, but the quickest lap time of preseason was set by Felipe Massa of Williams.", + "length": 185 + }, + { + "text": "\"It's very positive for the sport, this is the first time you've seen the sport bring in regulations that really push the envelope of technology for every type of car,\" the South African told CNN.", + "length": 196 + }, + { + "text": "Jody Scheckter, who took the drivers' title in 1979, hopes a raft of technological changes -- notably smaller, hybrid engines that promise greater fuel efficiency -- will help improve road cars' performance.", + "length": 207 + }, + { + "text": "The Brazilian is a new arrival at the British team following nine years with Ferrari and Scheckter expects Massa and Hamilton to start well, but he stopped short of tipping either to be top of the pile at the end of the season.", + "length": 227 + }, + { + "text": "\" The climax of the 2014 season is set to be a dramatic one, with double points set to be awarded to the driver who takes the checkered flag at November's Abu Dhabi Grand Prix, with the winner of that race awarded 50 points, rather than the usual 25.", + "length": 250 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5899950265884399 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:58.097368186Z", + "first_section_created": "2025-12-23T09:32:58.097645797Z", + "last_section_published": "2025-12-23T09:32:58.097857405Z", + "all_results_received": "2025-12-23T09:32:58.160715425Z", + "output_generated": "2025-12-23T09:32:58.160889932Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:58.097645797Z", + "publish_time": "2025-12-23T09:32:58.097857405Z", + "first_worker_start": "2025-12-23T09:32:58.098412128Z", + "last_worker_end": "2025-12-23T09:32:58.159755Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:58.098423428Z", + "start_time": "2025-12-23T09:32:58.098504031Z", + "end_time": "2025-12-23T09:32:58.098571034Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:58.098602Z", + "start_time": "2025-12-23T09:32:58.098745Z", + "end_time": "2025-12-23T09:32:58.159755Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:58.098396227Z", + "start_time": "2025-12-23T09:32:58.09846713Z", + "end_time": "2025-12-23T09:32:58.098543933Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:58.098319424Z", + "start_time": "2025-12-23T09:32:58.098412128Z", + "end_time": "2025-12-23T09:32:58.098487331Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3460, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/001d653a9803a2ecafc1aaa5510b5e9464d1dd75.json b/data/output/001d653a9803a2ecafc1aaa5510b5e9464d1dd75.json new file mode 100644 index 0000000..ca60657 --- /dev/null +++ b/data/output/001d653a9803a2ecafc1aaa5510b5e9464d1dd75.json @@ -0,0 +1,302 @@ +{ + "file_name": "001d653a9803a2ecafc1aaa5510b5e9464d1dd75.txt", + "total_words": 716, + "top_n_words": [ + { + "word": "the", + "count": 57 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "seals", + "count": 18 + }, + { + "word": "with", + "count": 15 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "as", + "count": 12 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "farne", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "amazing experience.", + "length": 19 + }, + { + "text": "No pictures please!", + "length": 19 + }, + { + "text": "They are so friendly.", + "length": 21 + }, + { + "text": "'The fact that these .", + "length": 22 + }, + { + "text": "Are you looking at me?", + "length": 22 + }, + { + "text": "started to rush into the water.", + "length": 31 + }, + { + "text": "We knew we were on for a good day!", + "length": 34 + }, + { + "text": "to splash water onto us in the boat.", + "length": 36 + }, + { + "text": "'On the first morning of the dive, as we .", + "length": 42 + }, + { + "text": "'They then proceeded to swim out and try .", + "length": 42 + }, + { + "text": "interact with you, it is a special evocative moment.", + "length": 52 + }, + { + "text": "arrived on our boat, the seals that had been lying on the shore all .", + "length": 69 + }, + { + "text": "as much fun with the divers as the divers do with them makes for an .", + "length": 69 + }, + { + "text": "seals not only come close and observe you, but actually play and have .", + "length": 71 + }, + { + "text": "'On this particular trip, I spent two days with the seals, which was really magical.", + "length": 84 + }, + { + "text": "Hotspot: The Farne Islands rookery of seals is thought to be several thousand in size .", + "length": 87 + }, + { + "text": "The long-whiskered water lover is one of thousands splashing around off the British coastland .", + "length": 95 + }, + { + "text": "At first sight, the animal appears to be far from friendly as its shows off a set of terrifying teeth .", + "length": 103 + }, + { + "text": "'There are several thousand seals there, and I've been going to photograph them there for the last 10 years.", + "length": 108 + }, + { + "text": "Intrigued: This wide-eyed seal was keen to find out more about its human visitor and swam up for a closer look .", + "length": 112 + }, + { + "text": "Frolicking: The adorable seals touch noses as they play among the seaweed while photographer Alan Hanlon watches on .", + "length": 117 + }, + { + "text": "Wildlife snapper Adam said: 'The rookery of seals at the Farne Islands is well known as an amazing site for seal interactions.", + "length": 126 + }, + { + "text": "Poser: The marine mammal puts away its teeth and shows a more gentle side when it swims up to the lens and peers into the glass .", + "length": 129 + }, + { + "text": "Under the sea: Wildlife photographer Alan Hanlon has been taking pictures of the Farne Islands rookery of seals for the last 10 years .", + "length": 135 + }, + { + "text": "Unique: The Farne Islands seals are unique in that they have never been fed or have any other reason to play with humans - but they do .", + "length": 136 + }, + { + "text": "Say cheese: The scary-looking seal bears its sharp teeth as it snarls at the camera during a hairy introduction with an underwater visitor .", + "length": 140 + }, + { + "text": "Smile for the camera: A seal attempts to take a bite out of a wildlife photographer's camera in the waters off Farne Islands, Northumberland .", + "length": 142 + }, + { + "text": "'Magical': Alan Hanlon spent two days swimming with the friendly seals and taking pictures of his sea hosts during his latest visit to Farne Islands .", + "length": 150 + }, + { + "text": "The wonderful pictures were taken in the waters near the Farne Islands, Northumberland, which is a renowned spot for anyone looking to swim side-by-side with seals.", + "length": 164 + }, + { + "text": "'The Farnes seals are unique in that they have never been fed, or have any other reason to play with humans, yet they choose to come out and play with visiting divers.", + "length": 167 + }, + { + "text": "The friendly pair of seals were more than happy to pose for the camera, repeatedly swimming up to the lens, peering into the glass and pawing at photographer Adam Hanlon.", + "length": 170 + }, + { + "text": "The stunning set of images of the frolicking marine mammals were captured in British waters by a diver who did well to hang onto his camera when one of the seals attempted to take a bite out it.", + "length": 194 + }, + { + "text": "These terrifying teeth should be enough to scare off even the bravest wildlife photographer - but those divers who persevere are rewarded with a unique encounter with an adorable group of playful seals.", + "length": 202 + }, + { + "text": "The animal appeared to be growling as he showed off his gnarling gnashers to the camera, but within seconds of this hairy introduction it was having a whale of a time splashing around with the underwater visitor.", + "length": 212 + }, + { + "text": "Amazing experience: 'These seals not only come close and observe you, but actually play and have as much fun as the divers do' A good day: 'Anytime a wild animal chooses to interact with you, it is a special evocative moment' 'Anytime a wild animal chooses to .", + "length": 261 + }, + { + "text": "' 'On the first morning of the dive, as we arrived on our boat, the seals that had been lying on the shore all started to rush into the water' 'The rookery of seals at the Farne Islands is well known as an amazing site for seal interactions' Scratch the surface: Divers flock to the Farne Islands regularly to take up the unique opportunity to interact with nature .", + "length": 366 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.47526177763938904 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:58.598652397Z", + "first_section_created": "2025-12-23T09:32:58.598979911Z", + "last_section_published": "2025-12-23T09:32:58.599219121Z", + "all_results_received": "2025-12-23T09:32:58.665100743Z", + "output_generated": "2025-12-23T09:32:58.665261949Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:58.598979911Z", + "publish_time": "2025-12-23T09:32:58.599219121Z", + "first_worker_start": "2025-12-23T09:32:58.59969704Z", + "last_worker_end": "2025-12-23T09:32:58.664085Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:58.59969224Z", + "start_time": "2025-12-23T09:32:58.599771943Z", + "end_time": "2025-12-23T09:32:58.599845646Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:58.599991Z", + "start_time": "2025-12-23T09:32:58.600133Z", + "end_time": "2025-12-23T09:32:58.664085Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:58.599760543Z", + "start_time": "2025-12-23T09:32:58.599821746Z", + "end_time": "2025-12-23T09:32:58.59992205Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:58.599623237Z", + "start_time": "2025-12-23T09:32:58.59969704Z", + "end_time": "2025-12-23T09:32:58.599739842Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3983, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/001d9259673bd2ffb613217d19b98ca3563874ac.json b/data/output/001d9259673bd2ffb613217d19b98ca3563874ac.json new file mode 100644 index 0000000..ac9179e --- /dev/null +++ b/data/output/001d9259673bd2ffb613217d19b98ca3563874ac.json @@ -0,0 +1,210 @@ +{ + "file_name": "001d9259673bd2ffb613217d19b98ca3563874ac.txt", + "total_words": 449, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "uefa", + "count": 9 + }, + { + "word": "be", + "count": 6 + }, + { + "word": "football", + "count": 6 + }, + { + "word": "euro", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "The closing date for bids is April 25 with UEFA's Executive Committee announcing the host cities on September 25 next year.", + "length": 123 + }, + { + "text": "\" Less illustrious footballing nations including Armenia, Israel, Kazakhstan and Wales have also thrown their hat into the ring.", + "length": 128 + }, + { + "text": "Traditionally, the tournament is hosted by one or two nations, but matches in 2020 will be shared between 13 cities across Europe.", + "length": 130 + }, + { + "text": "UEFA, who announced a change in format for the tournament's 60th anniversary last December, welcomed the enthusiastic response from the national football associations.", + "length": 167 + }, + { + "text": "(CNN) -- European football's governing body, UEFA has revealed that 32 of its 54 member states have declared an interest in hosting matches at the 2020 European Championships.", + "length": 175 + }, + { + "text": "UEFA says all 32 associations can submit a maximum of two bids -- one which covers three group matches and one knockout round and another which will vie to host the semifinal and final.", + "length": 185 + }, + { + "text": "\"UEFA's 'EURO for Europe' in 2020 promises to be a fitting way to recognise 60 years of the UEFA European Championship,\" said England Football Association secretary Alex Horne in a statement.", + "length": 191 + }, + { + "text": "\"The finals will be a great celebration of football across the European continent, and the 60th anniversary edition will be truly special, by really coming to the doorstep of all football fans.", + "length": 193 + }, + { + "text": "\"We are extremely proud to see the huge interest in the bidding process, with more than half of our member associations willing to host matches at UEFA EURO 2020,\" UEFA President Michel Platini said.", + "length": 199 + }, + { + "text": "\" Platini initially floated the idea following the 2012 championships hosted by Poland and Ukraine and a decision to change the format was agreed by UEFA's Executive Committee last December before being confirmed in January.", + "length": 224 + }, + { + "text": "The format, dubbed a \"Euro for Europe,\" has attracted interest from reigning Euro champions, Spain and the other traditional powerhouses of European football -- Germany, France, Italy, Netherlands, Portugal, Greece and England.", + "length": 227 + }, + { + "text": "\"It would be great to see England playing in front of their home fans here in London as part of a EURO Finals tournament but many countries have also put themselves forward as hosts and we expect this to be a very competitive bidding process.", + "length": 242 + }, + { + "text": "The full list of countries and their proposed host cities is as follows: Armenia (Yerevan), Azerbaijan (Baku), Belarus (Minsk), Belgium (Brussels), Bulgaria (Sofia), Croatia (Zagreb), Czech Republic (Prague), Denmark (Copenhagen), England (London), Finland (Helsinki), France (Lyon), Former Yugoslav Republic of Macedonia (Skopje), Germany (Munich), Greece (Athens), Hungary (Budapest), Israel (Jerusalem), Italy (Rome, Milan), Kazakhstan (Astana), Netherlands (Amsterdam), Poland (Warsaw, Chorzow), Portugal (Lisbon, Porto), Republic of Ireland (Dublin), Romania (Bucharest), Russia (St Petersburg), Scotland (Glasgow), Serbia (Belgrade), Spain (Madrid, Barcelona, Bilbao, Valencia), Sweden (Solna), Switzerland (Basel), Turkey (Istanbul), Ukraine (Kyiv, Donetsk) and Wales (Cardiff).", + "length": 785 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5214298367500305 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:59.100368326Z", + "first_section_created": "2025-12-23T09:32:59.100673938Z", + "last_section_published": "2025-12-23T09:32:59.100908448Z", + "all_results_received": "2025-12-23T09:32:59.16195557Z", + "output_generated": "2025-12-23T09:32:59.162140078Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:59.100673938Z", + "publish_time": "2025-12-23T09:32:59.100908448Z", + "first_worker_start": "2025-12-23T09:32:59.101481972Z", + "last_worker_end": "2025-12-23T09:32:59.160941Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:59.101417969Z", + "start_time": "2025-12-23T09:32:59.101487372Z", + "end_time": "2025-12-23T09:32:59.101530774Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:59.101643Z", + "start_time": "2025-12-23T09:32:59.101833Z", + "end_time": "2025-12-23T09:32:59.160941Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:59.101418569Z", + "start_time": "2025-12-23T09:32:59.101486172Z", + "end_time": "2025-12-23T09:32:59.101559275Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:59.101401769Z", + "start_time": "2025-12-23T09:32:59.101481972Z", + "end_time": "2025-12-23T09:32:59.101547675Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2979, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/001dc91a62bad5263259a1d664c821cb1e700f27.json b/data/output/001dc91a62bad5263259a1d664c821cb1e700f27.json new file mode 100644 index 0000000..e5976af --- /dev/null +++ b/data/output/001dc91a62bad5263259a1d664c821cb1e700f27.json @@ -0,0 +1,258 @@ +{ + "file_name": "001dc91a62bad5263259a1d664c821cb1e700f27.txt", + "total_words": 427, + "top_n_words": [ + { + "word": "to", + "count": 21 + }, + { + "word": "the", + "count": 18 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "was", + "count": 11 + }, + { + "word": "an", + "count": 8 + }, + { + "word": "jones", + "count": 8 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "he", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "'Mr.", + "length": 4 + }, + { + "text": ", a Daytona Beach native.", + "length": 25 + }, + { + "text": "He also was an amateur athlete.", + "length": 31 + }, + { + "text": "to his death with a phony Craigslist ad .", + "length": 41 + }, + { + "text": "' Jones had been shot while still in his car.", + "length": 45 + }, + { + "text": "Craiglist has yet to respond to reporters questions.", + "length": 52 + }, + { + "text": "He was looking to buy an iPhone from a seller on Craigslist.", + "length": 60 + }, + { + "text": "Police were dispatched to respond to a motor vehicle accident.", + "length": 62 + }, + { + "text": "After he was killed, the car rolled forward into another car and a tree.", + "length": 72 + }, + { + "text": "'This is a devastating loss for the Clark Atlanta University family,'' a statement read.", + "length": 88 + }, + { + "text": "Jordan Baker, 18, also allegedly acted in the plot to murder Jones with the lure of an iPhone .", + "length": 95 + }, + { + "text": "'The ad was posted in the hopes to lure somebody in, but obviously the victim didn't know that.", + "length": 95 + }, + { + "text": "'It was a heck of an undertaking and investigation but it helps to bring closure to the family.", + "length": 95 + }, + { + "text": "Police in suburban Atlanta say three people have been arrested in the death of 21-year-old James Jones Jr.", + "length": 106 + }, + { + "text": "Kaylnn Ruthenberg, 21, (left) Jonathon Myles, 19, (right) have been arrested in the plot to lure Jones Jr.", + "length": 106 + }, + { + "text": "Police in suburban Atlanta say three people have been arrested in the death of 21-year-old James Jones Jr.", + "length": 106 + }, + { + "text": "We hope to give them peace to know that everyone involved in this senseless murder will be held accountable, Baldwin said.", + "length": 122 + }, + { + "text": "Marietta Police spokesman David Baldwin says Jones was robbed and fatally shot Monday night after responding to an ad for an iPhone 6.", + "length": 134 + }, + { + "text": "'Through the course of the investigation, they discovered that he was going to that area in response to a Craigslist ad,' Baldwin said.", + "length": 135 + }, + { + "text": "' 'It was an illegitimate ad he was responding to and he did not know the sellers,' Marietta Police officer David Baldwin told ABC News.", + "length": 136 + }, + { + "text": "Clark Atlanta said in an email to students that it's reaching out to family and friends of Jones before finalizing any memorial tributes.", + "length": 137 + }, + { + "text": "(pictured) Clark Atlanta University says the killing of a junior chemistry major from Central Florida is a 'devastating' loss for the school.", + "length": 141 + }, + { + "text": "In addition to his studies, he was a UPS scholar at Fickett Elementary School and a science mentor for the HBCU Rising Cares Mentoring program at Brown Middle School.", + "length": 166 + }, + { + "text": "Baldwin says 18-year-old Jordan Baker, 19-year-old Jonathon Myles and 21-year-old Kaylnn Ruthenberg are jailed on felony murder, aggravated assault and armed robbery charges.", + "length": 174 + }, + { + "text": "Jones was a serious scholar and a kind, engaging spirit whose work and comportment spoke well of the university's core values and those instilled long ago by those contributing to his upbringing.", + "length": 195 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.803150475025177 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:32:59.601747541Z", + "first_section_created": "2025-12-23T09:32:59.602031052Z", + "last_section_published": "2025-12-23T09:32:59.602293963Z", + "all_results_received": "2025-12-23T09:32:59.667435354Z", + "output_generated": "2025-12-23T09:32:59.667609062Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:32:59.602031052Z", + "publish_time": "2025-12-23T09:32:59.602293963Z", + "first_worker_start": "2025-12-23T09:32:59.602788784Z", + "last_worker_end": "2025-12-23T09:32:59.666451Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:32:59.602812185Z", + "start_time": "2025-12-23T09:32:59.602861587Z", + "end_time": "2025-12-23T09:32:59.602921389Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:32:59.603078Z", + "start_time": "2025-12-23T09:32:59.603219Z", + "end_time": "2025-12-23T09:32:59.666451Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:32:59.602767583Z", + "start_time": "2025-12-23T09:32:59.602826285Z", + "end_time": "2025-12-23T09:32:59.602897088Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:32:59.602737481Z", + "start_time": "2025-12-23T09:32:59.602788784Z", + "end_time": "2025-12-23T09:32:59.602807484Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2444, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/001df6d074f775df5545154e0072e6edd826d796.json b/data/output/001df6d074f775df5545154e0072e6edd826d796.json new file mode 100644 index 0000000..f7833f1 --- /dev/null +++ b/data/output/001df6d074f775df5545154e0072e6edd826d796.json @@ -0,0 +1,306 @@ +{ + "file_name": "001df6d074f775df5545154e0072e6edd826d796.txt", + "total_words": 855, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "her", + "count": 24 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "for", + "count": 17 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "was", + "count": 12 + }, + { + "word": "christmas", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "She will always be the Oxo mum xxx'.", + "length": 36 + }, + { + "text": "' ITV has been contacted for comment.", + "length": 37 + }, + { + "text": "'No decision has been made yet,' he said.", + "length": 41 + }, + { + "text": "'The work was great fun to create,' he said.", + "length": 44 + }, + { + "text": "One, Anne Sheridan, wrote: 'Let Linda have her Christmas.", + "length": 57 + }, + { + "text": "'The adverts can be found on YouTube for anyone who would like to revisit them.", + "length": 79 + }, + { + "text": "The spokesman explained: 'Lynda was a fabulous actress with a career spanning many years.", + "length": 89 + }, + { + "text": "Fans of the actress lined up to pay heartfelt tributes after her death on Sunday, October 19.", + "length": 93 + }, + { + "text": "Beloved: A spokesman for Oxo said no decision had been made yet after a campaign by 150,000 fans .", + "length": 98 + }, + { + "text": "Brave face: In her final TV interview the actress said she was looking forward to a family Christmas .", + "length": 102 + }, + { + "text": "Fitting tribute: Artist Nathan Wyburn created this portrait in Lynda Bellingham's memory using Oxo cubes .", + "length": 106 + }, + { + "text": "Stresses and strains: The actress played a mother dealing with all the noise and chaos of a family Christmas .", + "length": 110 + }, + { + "text": "She was diagnosed with colon cancer only in July this year, and the disease later spread to her lungs and liver.", + "length": 112 + }, + { + "text": "The Prince of Wales said he was 'greatly saddened' by her death from cancer, praising her 'tireless' work for other people.", + "length": 123 + }, + { + "text": "'I wanted to create a fitting and respectful tribute to Lynda, and thought it was only appropriate to use Oxo cubes,' he said.", + "length": 126 + }, + { + "text": "'I love using food as an art medium, there’s so many colours, textures and even smells that make the portraits quite unique.", + "length": 126 + }, + { + "text": "She was also a presenter on Loose Women, where her final interview attracted an audience of 2million - double its usual figure.", + "length": 127 + }, + { + "text": "'So we're going to go to a hotel for the meal, but just my little bit of control, we're going to go home for pudding and presents.", + "length": 130 + }, + { + "text": "The work by Mr Wyburn, whose specialises in creating celebrity images using food and household items, took around two hours to complete.", + "length": 136 + }, + { + "text": "' A spokesman for Premier Foods, which runs the Oxo brand, said it was a 'highly sensitive issue' and the firm is 'reviewing the situation'.", + "length": 140 + }, + { + "text": "Mrs Bellingham was best known for playing mother in the Oxo Christmas adverts from 1983 to 1999, as each year her family grew up around her.", + "length": 140 + }, + { + "text": "'I've succumbed to not cooking it and I thought poor Michael [her husband], we can't put him through that again,' she told ITV's Loose Women.", + "length": 141 + }, + { + "text": "Among those remembering her were Prince Charles - who said she was a 'marvellous actress' and he backed her push to get more older people into work.", + "length": 148 + }, + { + "text": "Another, Jan Fravigar, added: 'It would be lovely, but what about her family how would they feel seeing her at Christmas, for me would be very hard.", + "length": 149 + }, + { + "text": "A food artist has created a portrait of the late Lynda Bellingham entirely out of Oxo cubes - the food she spent more than 15 years advertising on TV.", + "length": 150 + }, + { + "text": "For two years she had been an ambassador for the Prince's Initiative for Mature Enterprise (Prime) which helps the over-50s start their own companies.", + "length": 150 + }, + { + "text": "Other pieces of work by the artist have included portraits of William and Kate made out of crackers, and images of Joey Essex and Katie Price painted in fake tan.", + "length": 162 + }, + { + "text": "More than 150,000 Lynda Bellingham fans have a joined a campaign to bring back her classic Oxo TV advert - to pay tribute to her wish for one last family Christmas.", + "length": 164 + }, + { + "text": "Some fans of the 66-year-old actress have called on ITV to screen the advert free of charge while others have taking to donating to Cancer Research UK in her memory.", + "length": 165 + }, + { + "text": "Fine art graduate Nathan Wyburn, 25, from Abergavenny, Monmouthshire, created the tribute by putting beef stock cubes in water and painting the mixture onto a canvas.", + "length": 166 + }, + { + "text": "'She will always be remembered as the OXO mum but we don't wish to overshadow all the other inspiring things she did with her life and career by focusing solely on our adverts.", + "length": 176 + }, + { + "text": "The stock cube firm is considering whether to run the 1984 clip My Christmas on December 25 in memory of the actress, whose death prompted an outpouring of tributes last week.", + "length": 176 + }, + { + "text": "In her final TV interview she told of her excitement at spending her last Christmas with her family - ditching the apron of her screen appearances for a napkin in a nearby hotel.", + "length": 178 + }, + { + "text": "' The Facebook campaign group, set up after the actress lost her battle with colon cancer, was called 'Bring back the Oxo Christmas advert in memory of Lynda Bellingham' and has gained more than 150,000 fans.", + "length": 208 + }, + { + "text": "Final Christmas: Oxo is 'reviewing' whether to air the 1984 TV advert My Christmas (pictured) in memory of the late actress Lynda Bellingham, who played the much-loved mother of the family for more than 15 years .", + "length": 213 + }, + { + "text": "'For the past two years we have partnered with MacMillan Cancer Support, one of the charities supported by Lynda, to raise more than £250,000 to help families affected by cancer and we will continue to support this valuable cause.", + "length": 231 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.37638285756111145 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:00.103119355Z", + "first_section_created": "2025-12-23T09:33:00.103462569Z", + "last_section_published": "2025-12-23T09:33:00.103737981Z", + "all_results_received": "2025-12-23T09:33:00.164915808Z", + "output_generated": "2025-12-23T09:33:00.165134417Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:00.103462569Z", + "publish_time": "2025-12-23T09:33:00.103737981Z", + "first_worker_start": "2025-12-23T09:33:00.104112896Z", + "last_worker_end": "2025-12-23T09:33:00.163827Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:00.104255502Z", + "start_time": "2025-12-23T09:33:00.104332105Z", + "end_time": "2025-12-23T09:33:00.104413808Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:00.104496Z", + "start_time": "2025-12-23T09:33:00.104648Z", + "end_time": "2025-12-23T09:33:00.163827Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:00.104094495Z", + "start_time": "2025-12-23T09:33:00.104183199Z", + "end_time": "2025-12-23T09:33:00.104342706Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:00.104041493Z", + "start_time": "2025-12-23T09:33:00.104112896Z", + "end_time": "2025-12-23T09:33:00.104225301Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4798, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/001e07d8ee784776414f7fd08979971f08c03f24.json b/data/output/001e07d8ee784776414f7fd08979971f08c03f24.json new file mode 100644 index 0000000..544f1d6 --- /dev/null +++ b/data/output/001e07d8ee784776414f7fd08979971f08c03f24.json @@ -0,0 +1,372 @@ +{ + "file_name": "001e07d8ee784776414f7fd08979971f08c03f24.txt", + "total_words": 878, + "top_n_words": [ + { + "word": "of", + "count": 47 + }, + { + "word": "the", + "count": 44 + }, + { + "word": "to", + "count": 33 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "their", + "count": 24 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "them", + "count": 14 + }, + { + "word": "that", + "count": 13 + }, + { + "word": "was", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'Some .", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Lucy Crossley .", + "length": 15 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "05:40 EST, 18 November 2013 .", + "length": 29 + }, + { + "text": "07:50 EST, 18 November 2013 .", + "length": 29 + }, + { + "text": "'For them, the thought of letting me .", + "length": 38 + }, + { + "text": "lives,' said the London-based photographer.", + "length": 43 + }, + { + "text": "them bring down their barriers letting them help me to help them.", + "length": 65 + }, + { + "text": "of the people I photographed were deeply ashamed of their condition and .", + "length": 73 + }, + { + "text": "the state of their homes, and even had respectable jobs and good social .", + "length": 73 + }, + { + "text": "into their home was unbearable, but with some of them, I managed to help .", + "length": 74 + }, + { + "text": "'One of even refused to believe that that the pictures I was showing them was their house!", + "length": 90 + }, + { + "text": "Rubbish: A pile of rubbish and old food wrappers coat the kitchen work tops in one hoarders' home .", + "length": 99 + }, + { + "text": "Squalor: The collection of hair had spread to the bathroom, along with bags filled with paper and rubbish .", + "length": 107 + }, + { + "text": "Haul: A mountain of video cassettes, books, and even an old fan fill this room, almost from floor to ceiling .", + "length": 110 + }, + { + "text": "Mystery object: This strange item is unrecognisable, and yet one hoarder could not bring themselves to throw it away .", + "length": 118 + }, + { + "text": "Can't cook won't cook: Any food preparation would be impossible on this stove top, covered in piles of pots and pans .", + "length": 118 + }, + { + "text": "Forced out: Sometimes, the hoarders' collections had got so bad, they could no longer live in their home and had to move .", + "length": 122 + }, + { + "text": "'While some of the hoarders were ashamed of their homes, others were in denial and their clutter became invisible to them.", + "length": 122 + }, + { + "text": "'I’m grateful that they let me into their homes and the experience really opened my eyes to hoarding as a medical condition.", + "length": 126 + }, + { + "text": "Hair-raising: Among the more unusual, and stomach-churning, items discovered by Ms Salischicker was a collection of human hair .", + "length": 128 + }, + { + "text": "'I just hope that my photographs helped them see the extent of their hoarding and will help them conquer, or remedy their condition.", + "length": 132 + }, + { + "text": "A space to sit: One man sits in his chair, amidst a sea of old newspapers, discarded supermarket bags and numerous pairs of trainers .", + "length": 134 + }, + { + "text": "hoarder's activities have not been confined to the inside of their home, with a collection of twigs kept on top of some garden chairs .", + "length": 135 + }, + { + "text": "'Sometimes, the hoarders' collections had got so bad, that they could no longer live in their home and had to live elsewhere,' said Ms Salischiker.", + "length": 147 + }, + { + "text": "'When something broke, say the cooker, or the washing machine, they were that ashamed of letting someone in that whatever was broken would stay that way.", + "length": 153 + }, + { + "text": "Clutter: Photographer Paula Salischiker was invited into the homes of hoarders living in London and Essex in an attempt to help cure them of their condition .", + "length": 158 + }, + { + "text": "' The shocking series of images, taken by the Argentinian-born photographer, reveal some very unusual collections including, human hair, rotting food and twigs.", + "length": 160 + }, + { + "text": "'It was only when I showed them my photographs, that it opened their eyes to what was staring right at them, but initially they couldn’t see the wood through the trees.", + "length": 170 + }, + { + "text": "The group, which meets in London, is the first of its kind and gives free help and advice to members from around the country, who are trying to battle their addiction to hoarding.", + "length": 179 + }, + { + "text": "Ray of light: The homes pictured belong to members of a support group, which is the first of its kind and gives free help and advice to members trying to battle their addiction to collecting .", + "length": 192 + }, + { + "text": "Ms Salischiker said: 'The things some of the hoarders collected was unbelievable and I could never think of a use for a lot of their belongings - obviously to them, everything had it’s place.", + "length": 193 + }, + { + "text": "Kitchen surfaces piled high with pots and pans, plastic bags filled with old newspapers and even a collection of human hair are just some of the thousands of items which fill these hoarders' homes.", + "length": 197 + }, + { + "text": "London-based Ms Salischicker said that many of the hoarders, who live in London and Essex, had initially found it hard to let her inside, ashamed at the piles of rubbish they had amassed over the years.", + "length": 202 + }, + { + "text": "The secret life of the hoarders, who can not bear to throw the objects away, was unveiled when a number of hoarding support group members invited a photographer into their homes in a bid to shock them out of the condition.", + "length": 222 + }, + { + "text": "Piled high: Barely any space remains in this squalid kitchen, while another hoarder's bed was hidden with plastic bags, containing, among other items, newspapers, a Father Christmas hat and a box of Celebrations chocolates .", + "length": 224 + }, + { + "text": "' Earlier this year, hoarding was defined as a separate disorder from OCD in the new Diagnostic and Statistical Manual of Mental Disorders, meaning that until recently, the treatments offered have not been entirely appropriate.", + "length": 228 + }, + { + "text": "For many of the group, who meet regularly to seek help and advice, their addiction to hoarding had become so extreme that they had no choice but to leave their squalid homes behind, unable to live among the floor-to-ceiling stacks of newspapers, dirty clothes and shopping bags.", + "length": 278 + }, + { + "text": "Respectable: Ms Salischiker said that some of the homes she photographed belonged to people with 'respectable jobs' who were 'deeply ashamed of their condition' Photographer Paula Salischicker was asked by the group members to take pictures of their homes as part of an exercise to show the true extent of their 'collections'.", + "length": 326 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5604441165924072 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:00.604535071Z", + "first_section_created": "2025-12-23T09:33:00.604818383Z", + "last_section_published": "2025-12-23T09:33:00.605211299Z", + "all_results_received": "2025-12-23T09:33:00.707352219Z", + "output_generated": "2025-12-23T09:33:00.707596029Z", + "total_processing_time_ms": 103, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 102, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:00.604818383Z", + "publish_time": "2025-12-23T09:33:00.605088694Z", + "first_worker_start": "2025-12-23T09:33:00.605620216Z", + "last_worker_end": "2025-12-23T09:33:00.706359Z", + "total_journey_time_ms": 101, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:00.60570752Z", + "start_time": "2025-12-23T09:33:00.605752822Z", + "end_time": "2025-12-23T09:33:00.605848726Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:00.605867Z", + "start_time": "2025-12-23T09:33:00.606025Z", + "end_time": "2025-12-23T09:33:00.706359Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 100 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:00.605894627Z", + "start_time": "2025-12-23T09:33:00.606002232Z", + "end_time": "2025-12-23T09:33:00.60620654Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:00.605552613Z", + "start_time": "2025-12-23T09:33:00.605620216Z", + "end_time": "2025-12-23T09:33:00.605701819Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:00.605127196Z", + "publish_time": "2025-12-23T09:33:00.605211299Z", + "first_worker_start": "2025-12-23T09:33:00.605650017Z", + "last_worker_end": "2025-12-23T09:33:00.655524Z", + "total_journey_time_ms": 50, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:00.605699919Z", + "start_time": "2025-12-23T09:33:00.605740121Z", + "end_time": "2025-12-23T09:33:00.605745921Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:00.605971Z", + "start_time": "2025-12-23T09:33:00.60609Z", + "end_time": "2025-12-23T09:33:00.655524Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 49 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:00.605702619Z", + "start_time": "2025-12-23T09:33:00.605752222Z", + "end_time": "2025-12-23T09:33:00.605759322Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:00.605615616Z", + "start_time": "2025-12-23T09:33:00.605650017Z", + "end_time": "2025-12-23T09:33:00.605652617Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 149, + "min_processing_ms": 49, + "max_processing_ms": 100, + "avg_processing_ms": 74, + "median_processing_ms": 100, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2565, + "slowest_section_id": 0, + "slowest_section_time_ms": 101 + } +} diff --git a/data/output/001e32d87f76d9c2707b59d2d466a9494a9d671c.json b/data/output/001e32d87f76d9c2707b59d2d466a9494a9d671c.json new file mode 100644 index 0000000..8892e18 --- /dev/null +++ b/data/output/001e32d87f76d9c2707b59d2d466a9494a9d671c.json @@ -0,0 +1,404 @@ +{ + "file_name": "001e32d87f76d9c2707b59d2d466a9494a9d671c.txt", + "total_words": 941, + "top_n_words": [ + { + "word": "the", + "count": 55 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "it", + "count": 22 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "s", + "count": 20 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "earth", + "count": 19 + }, + { + "word": "cruithne", + "count": 16 + }, + { + "word": "in", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "Quite a lot.", + "length": 12 + }, + { + "text": "to pastures new.", + "length": 16 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "The story doesn't end there.", + "length": 28 + }, + { + "text": "We all know and love the moon.", + "length": 30 + }, + { + "text": "From our viewpoint, the Earth looks stationary.", + "length": 47 + }, + { + "text": "It has an elliptical orbit and takes around 364.", + "length": 48 + }, + { + "text": "This article originally appeared in The Conversation.", + "length": 53 + }, + { + "text": "But our planet’s faithful companion may not be alone.", + "length": 55 + }, + { + "text": "But the moon is not the Earth's only natural satellite.", + "length": 55 + }, + { + "text": "So what can we learn about the solar system from Cruithne?", + "length": 58 + }, + { + "text": "A representation of Cruithne's strange orbit around the sun.", + "length": 61 + }, + { + "text": "Horseshoe orbits are quite common for moons in the solar system.", + "length": 64 + }, + { + "text": "Saturn has a couple of moons in this configuration, for instance.", + "length": 65 + }, + { + "text": "The moon has been orbiting Earth for more than four billion years.", + "length": 66 + }, + { + "text": "The yellow orbit in the animation shows Cruithne's orbit around Earth .", + "length": 71 + }, + { + "text": "The point where it is predicted to get closest is about 2,750 years away.", + "length": 73 + }, + { + "text": "Cruithne, a 3 mile (5km) asteroid (animations shown above) is shown here.", + "length": 73 + }, + { + "text": "What's unique about Cruithne is how it wobbles and sways along its horseshoe.", + "length": 77 + }, + { + "text": "We're so assured that we only have one that we don't even give it a specific name.", + "length": 82 + }, + { + "text": "Cruithne scuttles around the inner solar system in what's called a 'horseshoe' orbit.", + "length": 85 + }, + { + "text": "92 days to circle the sun once, meaning the asteroid and Earth are in ‘resonant orbits’.", + "length": 92 + }, + { + "text": "A new object, dubbed 2014 OL339, has been found masquerading as Earth’s ‘second moon’.", + "length": 92 + }, + { + "text": "Instead, Cruithne scuttles around the inner solar system in what's called a 'horseshoe' orbit.", + "length": 94 + }, + { + "text": "Cruithne is expected to undergo a rather close encounter with Venus in about 8,000 years, however.", + "length": 98 + }, + { + "text": "A body on a simple horseshoe orbit around the Earth moves toward it, then turns round and moves away.", + "length": 101 + }, + { + "text": "The fact they do shows us that such interactions will have occurred while the solar system was forming.", + "length": 103 + }, + { + "text": "Once it's moved so far away it's approaching Earth from the other side, it turns around and moves away again.", + "length": 109 + }, + { + "text": "Like the many other asteroids and comets, it contains forensic evidence about how the planets were assembled.", + "length": 109 + }, + { + "text": "Its kooky orbit is an ideal testing ground for our understanding of how the solar system evolves under gravity.", + "length": 111 + }, + { + "text": "Now researchers believe its strange orbit may help scientists better understand how gravity helped planets form.", + "length": 112 + }, + { + "text": "This could provide an ideal testing ground for our understanding of how the solar system evolves under gravity .", + "length": 112 + }, + { + "text": "Cruithne and these other satellites teach us that the solar system isn't eternal – and by extension, neither are we.", + "length": 118 + }, + { + "text": "As recently as 1997, we discovered that another body, 3753 Cruithne, is what's called a quasi-orbital satellite of Earth.", + "length": 121 + }, + { + "text": "Because we think terrestrial planets grow via collisions of bodies of Cruithne-size and above, this is a big new variable.", + "length": 122 + }, + { + "text": "The peculiar object was accidentally discovered on July 29 by astronomer Farid Char of the Chilean University of Antofagasta.", + "length": 125 + }, + { + "text": "This happens when two orbiting bodies exert a gravitational influence on each other due to their orbits being closely related.", + "length": 126 + }, + { + "text": "Like a good foster home, the Earth plays host to many wayward lumps of rock looking for a gravitational well to hang around near.", + "length": 129 + }, + { + "text": "Asteroid 2014 OL339 orbits the sun in a similar timeframe to Earth, but our planet’s gravity pushes it into an eccentric wobble.", + "length": 130 + }, + { + "text": "There's a good chance that that will put paid to our erstwhile spare moon, flinging it out of harm's way, and out of the Terran family.", + "length": 135 + }, + { + "text": "It was written by Dr Duncan Forgan who is a postdoctoral research fellow at the University of St Andrews’ School of Physics and Astronomy.", + "length": 140 + }, + { + "text": "Cruithne doesn't loop around the Earth in a nice ellipse in the same way as the moon, or indeed the artificial satellites we loft into orbit.", + "length": 141 + }, + { + "text": "It wasn't until the end of the 20th century that we even realised that bodies would enter such weird horseshoe orbits and stay there for such a long time.", + "length": 154 + }, + { + "text": "The new ‘moon’, which is in fact a 490ft (150 metre) asteroid, takes about a year to orbit the sun and is close enough to Earth to look like its satellite.", + "length": 159 + }, + { + "text": "To help understand why it's called a horseshoe orbit, imagine we're looking down at the solar system, rotating at the same rate as the Earth goes round the sun.", + "length": 160 + }, + { + "text": "If Cruithne struck the Earth, though, that would be an extinction-level event, similar to what is believed to have occurred at the end of the Cretaceous period.", + "length": 160 + }, + { + "text": "One day, Cruithne could be a practice site for landing humans on asteroids, and perhaps even mining them for the rare-earth metals our new technologies desperately crave.", + "length": 170 + }, + { + "text": "If you look at Cruithne's motion in the solar system, it makes a messy ring around Earth's orbit, swinging so wide that it comes into the neighbourhood of both Venus and Mars.", + "length": 175 + }, + { + "text": "According to Rebecca Boyle writing in New Scientist, 2014 OL339 has been travelling near to our planet for about 775 years and it will continue to do so for another 165 years.", + "length": 175 + }, + { + "text": "If you look at Cruithne's motion in the solar system, it makes a messy ring around Earth's orbit, swinging so wide that it comes into the neighbourhood of both Venus (pictured) and Mars .", + "length": 187 + }, + { + "text": "Luckily it's not going to hit us anytime soon – its orbit is tilted out of the plane of the solar system, and astrophysicists have shown using simulations that while it can come quite close, it is extremely unlikely to hit us.", + "length": 228 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5294341295957565 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:01.105891485Z", + "first_section_created": "2025-12-23T09:33:01.106193297Z", + "last_section_published": "2025-12-23T09:33:01.106461909Z", + "all_results_received": "2025-12-23T09:33:01.217877512Z", + "output_generated": "2025-12-23T09:33:01.218113022Z", + "total_processing_time_ms": 112, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 111, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:01.106193297Z", + "publish_time": "2025-12-23T09:33:01.106398706Z", + "first_worker_start": "2025-12-23T09:33:01.106903427Z", + "last_worker_end": "2025-12-23T09:33:01.206969Z", + "total_journey_time_ms": 100, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:01.106929028Z", + "start_time": "2025-12-23T09:33:01.107035232Z", + "end_time": "2025-12-23T09:33:01.107136936Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:01.107162Z", + "start_time": "2025-12-23T09:33:01.107295Z", + "end_time": "2025-12-23T09:33:01.206969Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 99 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:01.106893726Z", + "start_time": "2025-12-23T09:33:01.106955929Z", + "end_time": "2025-12-23T09:33:01.107055933Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:01.106813623Z", + "start_time": "2025-12-23T09:33:01.106903427Z", + "end_time": "2025-12-23T09:33:01.10697273Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:01.106429907Z", + "publish_time": "2025-12-23T09:33:01.106461909Z", + "first_worker_start": "2025-12-23T09:33:01.106908127Z", + "last_worker_end": "2025-12-23T09:33:01.216948Z", + "total_journey_time_ms": 110, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:01.107006831Z", + "start_time": "2025-12-23T09:33:01.107045133Z", + "end_time": "2025-12-23T09:33:01.107054333Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:01.107234Z", + "start_time": "2025-12-23T09:33:01.107352Z", + "end_time": "2025-12-23T09:33:01.216948Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 109 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:01.106936628Z", + "start_time": "2025-12-23T09:33:01.106964429Z", + "end_time": "2025-12-23T09:33:01.10697933Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:01.106856325Z", + "start_time": "2025-12-23T09:33:01.106908127Z", + "end_time": "2025-12-23T09:33:01.106913927Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 208, + "min_processing_ms": 99, + "max_processing_ms": 109, + "avg_processing_ms": 104, + "median_processing_ms": 109, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2664, + "slowest_section_id": 1, + "slowest_section_time_ms": 110 + } +} diff --git a/data/output/001e8bda2f7ab73bf81314c1639a97dae2751703.json b/data/output/001e8bda2f7ab73bf81314c1639a97dae2751703.json new file mode 100644 index 0000000..6b4c0b5 --- /dev/null +++ b/data/output/001e8bda2f7ab73bf81314c1639a97dae2751703.json @@ -0,0 +1,492 @@ +{ + "file_name": "001e8bda2f7ab73bf81314c1639a97dae2751703.txt", + "total_words": 1229, + "top_n_words": [ + { + "word": "the", + "count": 58 + }, + { + "word": "to", + "count": 42 + }, + { + "word": "and", + "count": 31 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "zimmerman", + "count": 26 + }, + { + "word": "is", + "count": 25 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "that", + "count": 23 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "martin", + "count": 20 + } + ], + "sorted_sentences": [ + { + "text": "He was unarmed.", + "length": 15 + }, + { + "text": "Let them decide.", + "length": 16 + }, + { + "text": "And he is right.", + "length": 16 + }, + { + "text": "Pray for Trayvon.", + "length": 17 + }, + { + "text": "Pray for his family.", + "length": 20 + }, + { + "text": "Listen to their words.", + "length": 22 + }, + { + "text": "Former Florida State Rep.", + "length": 25 + }, + { + "text": "The screams for help stop.", + "length": 26 + }, + { + "text": "But there is an exception.", + "length": 26 + }, + { + "text": "I believe in our jury system.", + "length": 29 + }, + { + "text": "Remember Joran Van Der Sloot.", + "length": 29 + }, + { + "text": "to prevent death or great bodily harm.", + "length": 38 + }, + { + "text": "Martin was 17 years old, Zimmerman, 28.", + "length": 39 + }, + { + "text": "Our victims always have a tough plight.", + "length": 39 + }, + { + "text": "Martin is found dead, laying on his stomach.", + "length": 44 + }, + { + "text": "Martin asks Zimmerman why he is following him.", + "length": 46 + }, + { + "text": "But her career hasn't been without controversy.", + "length": 47 + }, + { + "text": "Dee Dee believes she hears Martin being tackled.", + "length": 48 + }, + { + "text": "Dee Dee hears someone ask Martin why he is there.", + "length": 49 + }, + { + "text": "And the answer to that question is far from clear.", + "length": 50 + }, + { + "text": "She tells him to run, and he agrees to walk quickly.", + "length": 52 + }, + { + "text": "In short, they said that their client had gone rogue.", + "length": 53 + }, + { + "text": "Zimmerman tells the dispatcher he is following Martin.", + "length": 54 + }, + { + "text": "And attorneys withdraw from cases around the country daily.", + "length": 59 + }, + { + "text": "But the question remains, will George Zimmerman be charged?", + "length": 59 + }, + { + "text": "Three witnesses saw Zimmerman straddling Martin in the grass.", + "length": 61 + }, + { + "text": "The dispatcher tells Zimmerman \"we don't need you to do that.", + "length": 61 + }, + { + "text": "Clients fire attorneys every day, for no reason or any reason.", + "length": 62 + }, + { + "text": "Rogue clients that are potential defendants spook prosecutors.", + "length": 62 + }, + { + "text": "It seems the Sanford Police certainly believed Zimmerman's claims.", + "length": 66 + }, + { + "text": "Corey needs to be able to prove her case beyond a reasonable doubt.", + "length": 67 + }, + { + "text": "Zimmerman sees Martin, deems him \"suspicious\" and calls the police.", + "length": 67 + }, + { + "text": "It can be a nightmare to try to locate and arrest a fleeing defendant.", + "length": 70 + }, + { + "text": "Some say this is a direct result of her aggressive prosecutorial bent.", + "length": 70 + }, + { + "text": "The opinions expressed in this commentary are solely those of Sunny Hostin.", + "length": 75 + }, + { + "text": "And I look forward to meeting with them to try to help them on this journey.", + "length": 76 + }, + { + "text": "And that is what this case ultimately boils down to -- who started the fight.", + "length": 77 + }, + { + "text": "Zimmerman says that he returns to his parked SUV and is attacked suddenly by Martin.", + "length": 84 + }, + { + "text": "Manslaughter would not be difficult to prove but for Florida's \"stand your ground\" law.", + "length": 87 + }, + { + "text": "The Trayvon Martin family was also pleased that Corey would make the charging decision.", + "length": 87 + }, + { + "text": "\" If she files charges against Zimmerman, it would be wise not to \"overcharge\" the case.", + "length": 88 + }, + { + "text": "A tenet of our legal system is that when there is conflicting evidence, let a jury decide.", + "length": 90 + }, + { + "text": "Martin left the home of his father's fiancee on February 26 to buy skittles and an ice tea.", + "length": 91 + }, + { + "text": "The incident happens 70 yards from the home Martin was walking to, not near Zimmerman's SUV.", + "length": 92 + }, + { + "text": "The police report describes Martin as 6 feet tall and 160 pounds and lists Zimmerman as 5-foot-9.", + "length": 97 + }, + { + "text": "Zimmerman is bleeding from his nose and the back of his head and has stains on the back of his jacket.", + "length": 102 + }, + { + "text": "Zimmerman left his home to go to Target and was carrying a concealed weapon for which he had a permit.", + "length": 102 + }, + { + "text": "\" Many suspect that the announcement that Zimmerman had gone rogue forced the special prosecutor's hand.", + "length": 104 + }, + { + "text": "public defenders,\" and \"We asked for prayers for our two-year-old victim, David, and for Cristian Fernandez.", + "length": 108 + }, + { + "text": "\" Martin notices Zimmerman is following him and tells his girlfriend, Dee Dee, with whom he is on the phone.", + "length": 108 + }, + { + "text": "To prove manslaughter in Florida, Corey's team would have to prove that Zimmerman's acts caused Martin's death.", + "length": 111 + }, + { + "text": "If Zimmerman was the initial aggressor, he cannot avail himself of the protection of the \"stand your ground\" law.", + "length": 113 + }, + { + "text": "During her 25 years as an assistant state attorney, Corey tried hundreds of cases, including more than 50 homicides.", + "length": 116 + }, + { + "text": "Corey, a devout Episcopalian, references her faith in discussing her cases, which some would say is a no-no for a prosecutor.", + "length": 125 + }, + { + "text": "By all accounts though, Angela Corey is a seasoned career prosecutor who doesn't bend to public opinion or political pressure.", + "length": 126 + }, + { + "text": "\" In discussing the investigation into the shooting death of Martin she said, \"What we are asking people to do is take a step back.", + "length": 131 + }, + { + "text": "But it is rarely done so publicly and with so much information divulged about the inner workings of the lawyer-client relationship.", + "length": 131 + }, + { + "text": "I believe these are wonderful people who are asking for a peaceful approach to this case, while still demanding the answers they deserve.", + "length": 137 + }, + { + "text": "Zimmerman isn't tested for drug or alcohol consumption and is allowed to leave the police station with the clothes he was wearing that night.", + "length": 141 + }, + { + "text": "Dennis Baxley, the co-sponsor of the law, told me by phone that the law doesn't apply to Zimmerman if he pursued Martin and was the initial aggressor.", + "length": 150 + }, + { + "text": "Witnesses say they heard angry words, heard someone crying for help (many explain it sounded like the voice of a younger person) and then a single gunshot.", + "length": 155 + }, + { + "text": "So even if Zimmerman killed Martin, he was justified in doing so if he believed he was in danger of being killed himself or of suffering great bodily harm.", + "length": 155 + }, + { + "text": "Zimmerman's attorneys termed as \"courageous\" her decision not to present evidence to the grand jury that the original prosecutor, Norman Wolfinger, scheduled to convene on April 10.", + "length": 181 + }, + { + "text": "During her three-year-plus as state attorney of the 4th Judicial Circuit, Jacksonville's Duval County jail has seen an increase in the population, despite a drop in crime in the city.", + "length": 183 + }, + { + "text": "The attorneys also said that they are concerned with Zimmerman's emotional and physical well-being and even suggested that he may be suffering from PTSD (post traumatic stress disorder).", + "length": 186 + }, + { + "text": "New York (CNN) -- Northeast Florida State Attorney Angela Corey has made it clear that she alone will decide whether George Zimmerman will be charged in the shooting death of Trayvon Martin.", + "length": 190 + }, + { + "text": "against himself, unless such force is so great that the person reasonably believes that he is in imminent danger of death or great bodily harm and that he has exhausted every reasonable means to escape.", + "length": 202 + }, + { + "text": "They said they had lost contact with their client over the previous two days and revealed Zimmerman's unusual behavior -- including phone calls to Fox News host Sean Hannity and the special prosecutor, Corey.", + "length": 208 + }, + { + "text": "Recently she came under intense fire for charging 12-year-old Cristian Fernandez as an adult in the killing of his 2-year-old brother, making Christian the youngest person in Florida ever to be charged as an adult.", + "length": 214 + }, + { + "text": "Tuesday, in a bizarre development, George Zimmerman's attorneys, Hal Uhrig and Craig Sonner, during a news conference held in front of the Seminole County Courthouse, announced that they had withdrawn from his representation.", + "length": 225 + }, + { + "text": "Not surprisingly, within hours of the now-infamous withdrawal, Corey issued a statement saying she would be holding her own news conference within 72 hours \"to release new information regarding the Trayvon Martin shooting death investigation.", + "length": 242 + }, + { + "text": "In a written statement she provided in response to her detractors about the Fernandez case, Corey defended her decision to charge Fernandez as an adult by stating, \"We are blessed in the 4th Circuit to have a great working relationship with ...", + "length": 244 + }, + { + "text": "Florida's law states that a person who is not engaged in an unlawful activity and who is attacked in any place where he or she has a right to be has no duty to retreat and has the right to stand his or her ground and meet force with force, including deadly force ...", + "length": 266 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.606517881155014 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:01.607216297Z", + "first_section_created": "2025-12-23T09:33:01.607568112Z", + "last_section_published": "2025-12-23T09:33:01.608099734Z", + "all_results_received": "2025-12-23T09:33:01.729764861Z", + "output_generated": "2025-12-23T09:33:01.730017271Z", + "total_processing_time_ms": 122, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 121, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:01.607568112Z", + "publish_time": "2025-12-23T09:33:01.607956428Z", + "first_worker_start": "2025-12-23T09:33:01.608272541Z", + "last_worker_end": "2025-12-23T09:33:01.710618Z", + "total_journey_time_ms": 103, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:01.608470349Z", + "start_time": "2025-12-23T09:33:01.609998412Z", + "end_time": "2025-12-23T09:33:01.610523534Z", + "queue_wait_time_ms": 2, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:01.608852Z", + "start_time": "2025-12-23T09:33:01.609041Z", + "end_time": "2025-12-23T09:33:01.710618Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 101 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:01.608401846Z", + "start_time": "2025-12-23T09:33:01.60847845Z", + "end_time": "2025-12-23T09:33:01.608630456Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:01.608191438Z", + "start_time": "2025-12-23T09:33:01.608272541Z", + "end_time": "2025-12-23T09:33:01.608331044Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:01.60800173Z", + "publish_time": "2025-12-23T09:33:01.608099734Z", + "first_worker_start": "2025-12-23T09:33:01.608545252Z", + "last_worker_end": "2025-12-23T09:33:01.728813Z", + "total_journey_time_ms": 120, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:01.60849585Z", + "start_time": "2025-12-23T09:33:01.608545252Z", + "end_time": "2025-12-23T09:33:01.608598655Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:01.608809Z", + "start_time": "2025-12-23T09:33:01.608952Z", + "end_time": "2025-12-23T09:33:01.728813Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 119 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:01.608503251Z", + "start_time": "2025-12-23T09:33:01.608570953Z", + "end_time": "2025-12-23T09:33:01.609193379Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:01.608686658Z", + "start_time": "2025-12-23T09:33:01.60872876Z", + "end_time": "2025-12-23T09:33:01.608768862Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 220, + "min_processing_ms": 101, + "max_processing_ms": 119, + "avg_processing_ms": 110, + "median_processing_ms": 119, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3548, + "slowest_section_id": 1, + "slowest_section_time_ms": 120 + } +} diff --git a/data/output/001ee59c375363263821474d40e4386ab91d5145.json b/data/output/001ee59c375363263821474d40e4386ab91d5145.json new file mode 100644 index 0000000..d6da475 --- /dev/null +++ b/data/output/001ee59c375363263821474d40e4386ab91d5145.json @@ -0,0 +1,198 @@ +{ + "file_name": "001ee59c375363263821474d40e4386ab91d5145.txt", + "total_words": 228, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "harvard", + "count": 6 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "policy", + "count": 5 + }, + { + "word": "that", + "count": 5 + }, + { + "word": "between", + "count": 4 + }, + { + "word": "faculty", + "count": 4 + }, + { + "word": "s", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "\" The action comes nearly a year after the U.", + "length": 45 + }, + { + "text": "The previous policy only did so between professors and the students they taught.", + "length": 80 + }, + { + "text": "therefore, the committee revised the policy to include a clear prohibition to better accord with these expectations.", + "length": 116 + }, + { + "text": "\" The new policy is the result of \"a formal process to review Harvard University's Title IX policy,\" the school said.", + "length": 117 + }, + { + "text": "Specifically, the school adopted a new policy this week that prohibits romantic relationships between undergraduates and professors.", + "length": 132 + }, + { + "text": "(CNN)If that car parked in Harvard Yard is a rockin', school officials may soon come a knockin', because hanky-panky between students and faculty at the elite university has officially been banned.", + "length": 197 + }, + { + "text": "Department of Education announced it was investigating 55 colleges and universities, including Harvard, for violations pertaining to Title IX, the federal law prohibiting sex discrimination on college campuses.", + "length": 210 + }, + { + "text": "Harvard released a statement saying a specially appointed committee \"determined that the existing language on relationships of unequal status did not explicitly reflect the faculty's expectations of what constituted an appropriate relationship between undergraduate students and faculty members ...", + "length": 298 + }, + { + "text": "Harvard responded at the time by saying it had appointed its first ever Title IX officer, and that the school's president \"recently announced the creation of a university-wide task force -- composed of faculty, students and staff -- that will recommend how we can better prevent sexual misconduct at Harvard.", + "length": 308 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5401723980903625 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:02.108881024Z", + "first_section_created": "2025-12-23T09:33:02.109295441Z", + "last_section_published": "2025-12-23T09:33:02.109491449Z", + "all_results_received": "2025-12-23T09:33:02.169181915Z", + "output_generated": "2025-12-23T09:33:02.169257118Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:02.109295441Z", + "publish_time": "2025-12-23T09:33:02.109491449Z", + "first_worker_start": "2025-12-23T09:33:02.109971369Z", + "last_worker_end": "2025-12-23T09:33:02.167939Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:02.109973769Z", + "start_time": "2025-12-23T09:33:02.110026171Z", + "end_time": "2025-12-23T09:33:02.110076573Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:02.1102Z", + "start_time": "2025-12-23T09:33:02.110344Z", + "end_time": "2025-12-23T09:33:02.167939Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:02.109970069Z", + "start_time": "2025-12-23T09:33:02.110016171Z", + "end_time": "2025-12-23T09:33:02.110065473Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:02.109914367Z", + "start_time": "2025-12-23T09:33:02.109971369Z", + "end_time": "2025-12-23T09:33:02.10999487Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1511, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/001f0d2683b49f0d95e47647717a9cac4018ce77.json b/data/output/001f0d2683b49f0d95e47647717a9cac4018ce77.json new file mode 100644 index 0000000..bef115b --- /dev/null +++ b/data/output/001f0d2683b49f0d95e47647717a9cac4018ce77.json @@ -0,0 +1,230 @@ +{ + "file_name": "001f0d2683b49f0d95e47647717a9cac4018ce77.txt", + "total_words": 431, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "bee", + "count": 12 + }, + { + "word": "beard", + "count": 10 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "competition", + "count": 8 + }, + { + "word": "is", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "Oh Bee-hive!", + "length": 12 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Nuria Morrison of Mexico shows off her bee beard for the competition.", + "length": 69 + }, + { + "text": "The Bee Beard Champion is then chosen by judges, while the audience is also able to pick their Crowd Favourite.", + "length": 111 + }, + { + "text": "Honey, there's a problem: The annual competition at Clovermead Apiaries, in Ontario, is now in its tenth year .", + "length": 111 + }, + { + "text": "John Hiemstra (left) and Ken Vandendool show off their bee beards at the annual Clovermead Bee Beard competition .", + "length": 114 + }, + { + "text": "'It takes well over a thousand stings in one shot to kill a person, so if you get a sting or two it’s no big deal.", + "length": 116 + }, + { + "text": "The remarkably daring men and women have got Ontario all abuzz after taking part in the tenth annual Clovermead Bee Beard Competition.", + "length": 134 + }, + { + "text": "Each team consists not only of the beard model, but also a 'bee whisperer', who is able to groom the insects into the required shape .", + "length": 134 + }, + { + "text": "Each team consists not only of the beard model, but also a 'bee whisperer', who is able to groom the flying insects into the required shape.", + "length": 140 + }, + { + "text": "It's not exactly a competition for the faint-hearted, but let's hope there was no sting in the tale for any of these brave bee beard growers.", + "length": 141 + }, + { + "text": "To bee the best, you've got to beat the best: Ken Vandendool (left) gets his bee beard groomed by Amsey McEown during the annual competition .", + "length": 142 + }, + { + "text": "Clovermead Apiaries co-owner Chris Hiemstra told the In Transit blog of The New York Times: 'Bee beards are wonderful, but they’re really tickly.", + "length": 147 + }, + { + "text": "The contest sees four teams of competitors entice swarms of bees to the head and torso of a 'beard grower' before two winners are chosen by the audience and the official judges.", + "length": 177 + }, + { + "text": "Once in place, the beards of the competitors at Clovermead are weighed before each wearer goes on to strut their stuff and perform a little dance in front of the watching crowds .", + "length": 179 + }, + { + "text": "At the Clovermead competition however, once in place, the beards are weighed before each wearer goes on to strut their stuff and perform a little dance in front of the watching crowds.", + "length": 184 + }, + { + "text": "' The competition at Clovermead sees $2,500 donated to the champion's charity of choice, while $2,000 is given to the Crowd Favourite's charity and $500 is offered to charities chosen by the remaining two competitors.", + "length": 217 + }, + { + "text": "Beebearding is thought to date back to the 1700s when an English beekeeper discovered he could create a beard of bees by tying the queen to a thread around his neck and would parade through the streets wearing the unusual costume.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4772247076034546 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:02.610253138Z", + "first_section_created": "2025-12-23T09:33:02.610557951Z", + "last_section_published": "2025-12-23T09:33:02.61076896Z", + "all_results_received": "2025-12-23T09:33:02.679742709Z", + "output_generated": "2025-12-23T09:33:02.679905316Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:02.610557951Z", + "publish_time": "2025-12-23T09:33:02.61076896Z", + "first_worker_start": "2025-12-23T09:33:02.61125398Z", + "last_worker_end": "2025-12-23T09:33:02.678846Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:02.611193677Z", + "start_time": "2025-12-23T09:33:02.61125398Z", + "end_time": "2025-12-23T09:33:02.611297282Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:02.611489Z", + "start_time": "2025-12-23T09:33:02.611613Z", + "end_time": "2025-12-23T09:33:02.678846Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:02.611203478Z", + "start_time": "2025-12-23T09:33:02.61125408Z", + "end_time": "2025-12-23T09:33:02.611316182Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:02.61124718Z", + "start_time": "2025-12-23T09:33:02.611312482Z", + "end_time": "2025-12-23T09:33:02.611354184Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2397, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/001f1b4c0f2efddaed544d59cbfa6f23d525a5af.json b/data/output/001f1b4c0f2efddaed544d59cbfa6f23d525a5af.json new file mode 100644 index 0000000..8a2d858 --- /dev/null +++ b/data/output/001f1b4c0f2efddaed544d59cbfa6f23d525a5af.json @@ -0,0 +1,254 @@ +{ + "file_name": "001f1b4c0f2efddaed544d59cbfa6f23d525a5af.txt", + "total_words": 460, + "top_n_words": [ + { + "word": "to", + "count": 20 + }, + { + "word": "music", + "count": 15 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "the", + "count": 11 + }, + { + "word": "driving", + "count": 9 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "by", + "count": 8 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "their", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Tom Gardner .", + "length": 13 + }, + { + "text": "07:57 EST, 26 August 2013 .", + "length": 27 + }, + { + "text": "07:59 EST, 26 August 2013 .", + "length": 27 + }, + { + "text": "Without listening to any music, 92 percent made errors.", + "length": 55 + }, + { + "text": "But when soothing tunes were played motoring behaviour actually improved, the survey discovered.", + "length": 96 + }, + { + "text": "These errors included speeding, tailgating, careless lane switching, passing vehicles and one-handed driving.", + "length": 109 + }, + { + "text": "Teenage drivers who listen to rock or rap music are more likely to drive badly or be involved in a crash, a study found.", + "length": 120 + }, + { + "text": "'Young drivers also tend to play this highly energetic, fast-paced music very loudly - approximately 120 to 130 decibels.", + "length": 121 + }, + { + "text": "Dangers: Young men are particularly susceptible to seeing their driving standards deteriorate if they listen to their preferred music.", + "length": 134 + }, + { + "text": "' The Israel National Road Safety Authority funded the study which will be published in the October issue of Accident Analysis and Prevention.", + "length": 142 + }, + { + "text": "'Most drivers worldwide prefer to listen to music in a car and those between ages 16 to 30 choose driving to pop, rock, dance, hip-hop and rap,' Brodsky explains.", + "length": 162 + }, + { + "text": "Men in particular made more frequent and serious mistakes listening to their favourite tunes than their less aggressive, female counterparts, the researchers noted.", + "length": 164 + }, + { + "text": "However, when driving with an alternative music background designed by Brodsky and Israeli music composer Micha Kisner, deficient driving behaviors decreased by 20 percent.", + "length": 172 + }, + { + "text": "When the teen drivers listened to their preferred music, virtually all (98 percent) demonstrated an average of three deficient driving behaviors in at least one of the trips.", + "length": 174 + }, + { + "text": "' 'Drivers in general are not aware that as they get drawn-in by a song, they move from an extra-personal space involving driving tasks, to a more personal space of active music listening.", + "length": 188 + }, + { + "text": "Young motorists made a greater number of errors and miscalculations while driving with their aggressive, fast-paced and loud music on, according to research from Ben-Gurion University of the Negev in Israel.", + "length": 207 + }, + { + "text": "(Picture posed by model) Nearly a third of those (32 percent) required a a sudden verbal warning or command for action, and 20 percent needed an assisted steering or braking maneuver to prevent an imminent accident.", + "length": 215 + }, + { + "text": "Each driver took six challenging 40-minute trips; two with music from their own playlists; two with background music designed to increase driver safety (easy listening, soft rock, light jazz), and two additional trips without any music.", + "length": 236 + }, + { + "text": "Risk: Teens who listen to loud, aggressive music while behind the wheel are more likely to be involved in a crash or drive badly, a study found (File photo) The BGU study evaluated 85 young novice drivers accompanied by a researcher/driving instructor.", + "length": 252 + }, + { + "text": "The study was conducted by BGU Director of Music Science Research Warren Brodsky and researcher Zack Slor to assess distraction by measuring driver deficiencies (miscalculation, inaccuracy, aggressiveness, and violations) as well as decreased vehicle performance.", + "length": 263 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4956147074699402 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:03.111567651Z", + "first_section_created": "2025-12-23T09:33:03.111909865Z", + "last_section_published": "2025-12-23T09:33:03.112104773Z", + "all_results_received": "2025-12-23T09:33:03.179794969Z", + "output_generated": "2025-12-23T09:33:03.179964076Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:03.111909865Z", + "publish_time": "2025-12-23T09:33:03.112104773Z", + "first_worker_start": "2025-12-23T09:33:03.112585993Z", + "last_worker_end": "2025-12-23T09:33:03.178878Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:03.112635095Z", + "start_time": "2025-12-23T09:33:03.112702397Z", + "end_time": "2025-12-23T09:33:03.1127689Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:03.112816Z", + "start_time": "2025-12-23T09:33:03.112938Z", + "end_time": "2025-12-23T09:33:03.178878Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:03.112579092Z", + "start_time": "2025-12-23T09:33:03.112652395Z", + "end_time": "2025-12-23T09:33:03.112723098Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:03.11251349Z", + "start_time": "2025-12-23T09:33:03.112585993Z", + "end_time": "2025-12-23T09:33:03.112611094Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2927, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/001f2856c2bca7de7918eb155b2e1bc8aa0d8695.json b/data/output/001f2856c2bca7de7918eb155b2e1bc8aa0d8695.json new file mode 100644 index 0000000..a187507 --- /dev/null +++ b/data/output/001f2856c2bca7de7918eb155b2e1bc8aa0d8695.json @@ -0,0 +1,254 @@ +{ + "file_name": "001f2856c2bca7de7918eb155b2e1bc8aa0d8695.txt", + "total_words": 508, + "top_n_words": [ + { + "word": "the", + "count": 34 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "on", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "festival", + "count": 7 + }, + { + "word": "for", + "count": 7 + }, + { + "word": "from", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Palio di Siena, Italy .", + "length": 23 + }, + { + "text": "White Turf, Switzerland .", + "length": 25 + }, + { + "text": "Pasola Festival, Indonesia .", + "length": 28 + }, + { + "text": "Luminarias Festival, Spain .", + "length": 28 + }, + { + "text": "Soma-Nomaoi Festival, Japan .", + "length": 29 + }, + { + "text": "Watch: From camel racing to prized jockey .", + "length": 43 + }, + { + "text": "It is believed that every drop of blood spilled will bring a good harvest.", + "length": 74 + }, + { + "text": "(CNN) -- Each summer, more than 50,000 people pour into a cobblestone square in Tuscany for a gut-wrenching 90-seconds.", + "length": 119 + }, + { + "text": "If you like your ancient festivals a little less violent, there's the 1,000-year-old Soma-Nomaoi wild horse chase in central Japan.", + "length": 131 + }, + { + "text": "The glitz and glam of snow-capped St Moritz couldn't be further from the spear-wielding horsemen of Indonesia's annual Pasola Festival.", + "length": 135 + }, + { + "text": "Dressed in extravagant armor, helmets, and carrying swords, the fantastical warriors appear to have stepped straight out from the 10th Century.", + "length": 143 + }, + { + "text": "Now as the dust settles on this year's colorful Palio di Siena, CNN takes a look at five weird and wonderful horse festivals from across the world.", + "length": 147 + }, + { + "text": "The controversial tradition, which has been criticized by animal welfare groups, dates back 500 years and is held on the eve of Saint Anthony's Day.", + "length": 148 + }, + { + "text": "From fiery Spain to snowy Switzerland -- there's not a bonfire in sight at the glitzy White Turf racing carnival, held on the frozen Lake St Moritz.", + "length": 148 + }, + { + "text": "The three-day festival sees samurai horsemen compete in different challenges -- from racing over a one kilometer track to battling over sacred flags.", + "length": 149 + }, + { + "text": "Fire and horses may seem like an unlikely combination, but that's exactly the dramatic scene which takes place in a small town in central Spain each year.", + "length": 154 + }, + { + "text": "That's all it takes for the flamboyantly dressed jockeys of Italy's legendary horse race -- Palio di Siena -- to race bareback around the medieval square.", + "length": 154 + }, + { + "text": "Horse are ridden over blazing tree branches as part of the Luminaries Festival in San Bartolome de Pinares, in an effort to purify and protect the animals.", + "length": 155 + }, + { + "text": "Dating back to the 17th century, each rider represents their local neighborhood, competing not just for the coveted victory banner -- but good luck for the coming year.", + "length": 168 + }, + { + "text": "The remarkable competition includes skijoring, where horses thunder around the icy track while their riders hold on for dear life to a harness at the back, trailing behind on skis.", + "length": 180 + }, + { + "text": "In such extreme weather -- around -20C -- fur coats are the order of the day for the champagne-sipping spectators who are perhaps better known for their luxury lifestyles than racing tips.", + "length": 188 + }, + { + "text": "On two days each year, the pretty town of Siena in northern Tuscany is transformed into an elaborate medieval race track, with 10 riders careering around the iconic city square three times.", + "length": 189 + }, + { + "text": "The fierce festival -- held on the island of Sumba -- sees two teams go head-to-head on elaborately decorated horses, throwing blunt spears at each other as part of an ancient ritual battle.", + "length": 190 + }, + { + "text": "It's been called the \"most dangerous horse race in the world\" -- about as far away as you can get from the genteel green lawns of Britain's Royal Ascot or the multimillion prize money bestowed on America's Kentucky Derby.", + "length": 221 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5812581777572632 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:03.612889363Z", + "first_section_created": "2025-12-23T09:33:03.613229377Z", + "last_section_published": "2025-12-23T09:33:03.613397984Z", + "all_results_received": "2025-12-23T09:33:03.682081822Z", + "output_generated": "2025-12-23T09:33:03.682253729Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:03.613229377Z", + "publish_time": "2025-12-23T09:33:03.613397984Z", + "first_worker_start": "2025-12-23T09:33:03.614000209Z", + "last_worker_end": "2025-12-23T09:33:03.681226Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:03.613947807Z", + "start_time": "2025-12-23T09:33:03.614000209Z", + "end_time": "2025-12-23T09:33:03.614049111Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:03.614135Z", + "start_time": "2025-12-23T09:33:03.614268Z", + "end_time": "2025-12-23T09:33:03.681226Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:03.613952807Z", + "start_time": "2025-12-23T09:33:03.614010409Z", + "end_time": "2025-12-23T09:33:03.614067312Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:03.613982208Z", + "start_time": "2025-12-23T09:33:03.614069212Z", + "end_time": "2025-12-23T09:33:03.614107513Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2992, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/001f7a255ce06e0bc3a8cffb94fc0019ba0f2f34.json b/data/output/001f7a255ce06e0bc3a8cffb94fc0019ba0f2f34.json new file mode 100644 index 0000000..f6f1151 --- /dev/null +++ b/data/output/001f7a255ce06e0bc3a8cffb94fc0019ba0f2f34.json @@ -0,0 +1,254 @@ +{ + "file_name": "001f7a255ce06e0bc3a8cffb94fc0019ba0f2f34.txt", + "total_words": 369, + "top_n_words": [ + { + "word": "the", + "count": 18 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "diva", + "count": 8 + }, + { + "word": "had", + "count": 8 + }, + { + "word": "italvino", + "count": 8 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "couple", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Italvino was the first to go.", + "length": 29 + }, + { + "text": "'At that moment she became more peaceful.", + "length": 41 + }, + { + "text": "' Just 40 minutes later, Diva had died too.", + "length": 43 + }, + { + "text": "'He considered their marriage one eternal date.", + "length": 47 + }, + { + "text": "'I've never seen anything like it,' Rafael said.", + "length": 48 + }, + { + "text": "'My grandfather went to [her room in the] hospital too.", + "length": 55 + }, + { + "text": "Italvino, 89, had been hospitalized with leukemia since last August.", + "length": 68 + }, + { + "text": "He even made sure the garden was filled with her favorite vegetables.", + "length": 69 + }, + { + "text": "Meanwhile, Diva was receiving chemotherapy for a tumor, Express UK reports.", + "length": 75 + }, + { + "text": "' They were never apart for a day and every morning Italvino cooked Diva breakfast.", + "length": 83 + }, + { + "text": "The couple first met at a dance in 1948, where Italvino was smitten at first sight.", + "length": 83 + }, + { + "text": "The had 10 children and 14 grandchildren, who remembered the couple as a legendary romance.", + "length": 91 + }, + { + "text": "The pair had 10 children and 14 grandchildren, who remembered the couple as a legendary romance .", + "length": 97 + }, + { + "text": "They had a private conversation together and after that they both seemed at peace with everything.", + "length": 98 + }, + { + "text": "Italvino and Diva Poss passed away in the same hospital room, side-by-side, only 40 minutes apart.", + "length": 98 + }, + { + "text": "Diva reportedly asked family to gather at the hospital sometime last week as her condition worsened.", + "length": 100 + }, + { + "text": "'She felt that her time was coming and asked to see her relatives,' grandson Rafael Max told reporters.", + "length": 103 + }, + { + "text": "After 65 years of marriage, one Brazilian couple's bond was so strong they stayed together even in death.", + "length": 105 + }, + { + "text": "Nurses moved Italvino into a vacant bed next to Diva then moved the beds together so the couple could hold hands .", + "length": 114 + }, + { + "text": "It was as if he had opened the doors for her to go to, as if he was arranging for them both to be together forever.", + "length": 115 + }, + { + "text": "'After he died my aunt whispered in my grandmother's ear that my grandfather had passed away in peace,' Rafael said.", + "length": 116 + }, + { + "text": "Italvino and Diva Poss passed away in the same hospital room, side-by-side, only 40 minutes apart after almost 70 years together .", + "length": 130 + }, + { + "text": "'[My grandfather] always said that they had been married for a year longer, because he counted it from the day they met,' Rafael said.", + "length": 134 + }, + { + "text": "' The Huffington Post reports that nurses moved Italvino into a vacant bed next to Diva then moved the beds together so the couple could hold hands.", + "length": 148 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.413717120885849 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:04.114149373Z", + "first_section_created": "2025-12-23T09:33:04.114473786Z", + "last_section_published": "2025-12-23T09:33:04.114700295Z", + "all_results_received": "2025-12-23T09:33:04.177618495Z", + "output_generated": "2025-12-23T09:33:04.1777445Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:04.114473786Z", + "publish_time": "2025-12-23T09:33:04.114700295Z", + "first_worker_start": "2025-12-23T09:33:04.115094912Z", + "last_worker_end": "2025-12-23T09:33:04.17662Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:04.115209716Z", + "start_time": "2025-12-23T09:33:04.11528812Z", + "end_time": "2025-12-23T09:33:04.115391924Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:04.115309Z", + "start_time": "2025-12-23T09:33:04.115441Z", + "end_time": "2025-12-23T09:33:04.17662Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:04.11504391Z", + "start_time": "2025-12-23T09:33:04.115094912Z", + "end_time": "2025-12-23T09:33:04.115135413Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:04.115105812Z", + "start_time": "2025-12-23T09:33:04.115160414Z", + "end_time": "2025-12-23T09:33:04.115180015Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2110, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/001f9c554f1a29169413d0d2f138212a14c6dcf1.json b/data/output/001f9c554f1a29169413d0d2f138212a14c6dcf1.json new file mode 100644 index 0000000..3652324 --- /dev/null +++ b/data/output/001f9c554f1a29169413d0d2f138212a14c6dcf1.json @@ -0,0 +1,202 @@ +{ + "file_name": "001f9c554f1a29169413d0d2f138212a14c6dcf1.txt", + "total_words": 189, + "top_n_words": [ + { + "word": "the", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "libyan", + "count": 6 + }, + { + "word": "and", + "count": 5 + }, + { + "word": "foreign", + "count": 5 + }, + { + "word": "greek", + "count": 5 + }, + { + "word": "of", + "count": 5 + }, + { + "word": "is", + "count": 4 + }, + { + "word": "message", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "The nature of that message was not immediately known.", + "length": 53 + }, + { + "text": "Obeidi is the Libyan deputy foreign minister in charge of European affairs.", + "length": 75 + }, + { + "text": "\"We stressed -- reiterated -- the clear message of the international community.", + "length": 79 + }, + { + "text": "Journalists Houda Zaghdoudi and Elinda Labropoulou contributed to this report .", + "length": 79 + }, + { + "text": "Obeidi is expected to continue talks in Turkey and Malta, according to Droutsas.", + "length": 80 + }, + { + "text": "\"From what the Libyan envoy said, it is clear that the administration is looking for a solution,\" he added.", + "length": 107 + }, + { + "text": "The envoy crossed the Libyan border into Tunisia Sunday morning, and from there boarded a private Greek plane for Athens.", + "length": 121 + }, + { + "text": "Obeidi met with Greek Prime Minister George Papandreou Sunday night, according to Greek Foreign Minister Dimitris Droutsas.", + "length": 123 + }, + { + "text": "Libya asked Greece to allow a special envoy to travel there to communicate a message, Greek foreign ministry spokesman Grigoris Delavekouras said.", + "length": 146 + }, + { + "text": "(CNN) -- Libyan Deputy Foreign Minister Abdelati Obeidi flew to Greece Sunday to deliver a personal message from Libyan leader Moammar Gadhafi, a Greek foreign ministry official told CNN.", + "length": 187 + }, + { + "text": "One of full support and implementation for the decisions of the United Nations, immediate ceasefire and an end to violence, particularly against Libyan civilians,\" Droutsas said after the meeting.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4987765848636627 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:04.615467985Z", + "first_section_created": "2025-12-23T09:33:04.615882702Z", + "last_section_published": "2025-12-23T09:33:04.616115712Z", + "all_results_received": "2025-12-23T09:33:04.674740234Z", + "output_generated": "2025-12-23T09:33:04.674865739Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:04.615882702Z", + "publish_time": "2025-12-23T09:33:04.616115712Z", + "first_worker_start": "2025-12-23T09:33:04.616634233Z", + "last_worker_end": "2025-12-23T09:33:04.673867Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:04.616575831Z", + "start_time": "2025-12-23T09:33:04.616634233Z", + "end_time": "2025-12-23T09:33:04.616661934Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:04.616815Z", + "start_time": "2025-12-23T09:33:04.616955Z", + "end_time": "2025-12-23T09:33:04.673867Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:04.616606032Z", + "start_time": "2025-12-23T09:33:04.616675035Z", + "end_time": "2025-12-23T09:33:04.616739937Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:04.616576531Z", + "start_time": "2025-12-23T09:33:04.616638533Z", + "end_time": "2025-12-23T09:33:04.616651534Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1256, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/001fb37cf8a91dd358548d409d1ea09f4120ab6f.json b/data/output/001fb37cf8a91dd358548d409d1ea09f4120ab6f.json new file mode 100644 index 0000000..4a2fdc2 --- /dev/null +++ b/data/output/001fb37cf8a91dd358548d409d1ea09f4120ab6f.json @@ -0,0 +1,616 @@ +{ + "file_name": "001fb37cf8a91dd358548d409d1ea09f4120ab6f.txt", + "total_words": 1737, + "top_n_words": [ + { + "word": "the", + "count": 98 + }, + { + "word": "a", + "count": 68 + }, + { + "word": "of", + "count": 42 + }, + { + "word": "in", + "count": 36 + }, + { + "word": "to", + "count": 35 + }, + { + "word": "was", + "count": 34 + }, + { + "word": "he", + "count": 31 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "on", + "count": 28 + }, + { + "word": "london", + "count": 23 + } + ], + "sorted_sentences": [ + { + "text": "11.", + "length": 3 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'He .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "That .", + "length": 6 + }, + { + "text": "London.", + "length": 7 + }, + { + "text": "9 today.", + "length": 8 + }, + { + "text": "morning.", + "length": 8 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Sam Webb .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "The next day Venera .", + "length": 21 + }, + { + "text": "He died the following .", + "length": 23 + }, + { + "text": "She died on Wednesday .", + "length": 23 + }, + { + "text": "You just cant imagine a .", + "length": 25 + }, + { + "text": "It's still out there now.", + "length": 25 + }, + { + "text": "12:53 EST, 19 November 2013 .", + "length": 29 + }, + { + "text": "11:59 EST, 19 November 2013 .", + "length": 29 + }, + { + "text": "I looked out and saw the lorry.", + "length": 31 + }, + { + "text": "We will also target unsafe cyclists.", + "length": 36 + }, + { + "text": "was a unique, special, beautiful person.", + "length": 40 + }, + { + "text": "Cyclist: Venera Minakhmetova with her bicycle.", + "length": 46 + }, + { + "text": "30pm in Aldgate, near the Cycle Superhighway 2.", + "length": 47 + }, + { + "text": "She added: 'It has been a hard year for CoolTan.", + "length": 48 + }, + { + "text": "'We are gutted, it's such a shock and such a sad tragedy.", + "length": 57 + }, + { + "text": "' A police tent next to the lorry involved in the incident.", + "length": 59 + }, + { + "text": "She said: 'It's absolutely, one million per cent devastating.", + "length": 61 + }, + { + "text": "evening a 21 year old man from St John's Wood was hit by a bus at .", + "length": 67 + }, + { + "text": "She was very near to the junction and there was loads of screaming.", + "length": 67 + }, + { + "text": "the notorious Bow roundabout on the Cycle Superhighway at rush hour.", + "length": 68 + }, + { + "text": "more beautiful, selfless, human being, he was a big part of the team.", + "length": 69 + }, + { + "text": "crushed by a single-decker bus outside East Croydon station in south .", + "length": 70 + }, + { + "text": "Map: The sites of the six deaths in the capital in the last two weeks .", + "length": 71 + }, + { + "text": "following Tuesday, November 12th Roger de Klerk, 43, died after he was .", + "length": 72 + }, + { + "text": "Minakhmetova, a Russian entrepreneur died after being hit by a lorry at .", + "length": 73 + }, + { + "text": "'The lorry looks like it was turning left and he just went straight under.", + "length": 74 + }, + { + "text": "' Police officers remove the bike, which was been twisted in the accident .", + "length": 75 + }, + { + "text": "The death takes the number of to be killed in London to six in just 14 days.", + "length": 76 + }, + { + "text": "'I would not be against a prohibition or ban on cyclists wearing headphones.", + "length": 76 + }, + { + "text": "He said: 'I came past and saw people panicking around the side of the lorry.", + "length": 76 + }, + { + "text": "The paramedics were working on him as shocked people just stood and watched.", + "length": 76 + }, + { + "text": "'We didn't know if he was dead or alive, all we could see was his white chinos.", + "length": 79 + }, + { + "text": "' The mangled remains of the victim's bike trapped in the wheels of the lorry .", + "length": 79 + }, + { + "text": "The latest death means 14 cyclists have died on London's roads so far this year.", + "length": 80 + }, + { + "text": "Pictured are police officers standing next to the lorry involved in the incident .", + "length": 82 + }, + { + "text": "the back of the truck, people were screaming, there was nothing that they could do.", + "length": 83 + }, + { + "text": "The death brings the number of cyclists killed in London to six in just two weeks .", + "length": 83 + }, + { + "text": "An eyewitness described seeing 'white chinos' sticking out from underneath the cab.", + "length": 83 + }, + { + "text": "' A cyclist in his early 60s has died after a collision with a lorry in south London.", + "length": 85 + }, + { + "text": "He said: 'I'm very alarmed about cyclists wearing headphones,' he told BBC London 94.", + "length": 85 + }, + { + "text": "' What was left of his twisted road bike could be seen caught under the driver's cab.", + "length": 85 + }, + { + "text": "'There were sirens everywhere and someone said a cyclist was trapped under the lorry.", + "length": 85 + }, + { + "text": "'The ambulance seems to have disappeared, but there are still a load of police around.", + "length": 86 + }, + { + "text": "He was a keen volunteer who once donated at £3,000 prize to a charity he worked with .", + "length": 87 + }, + { + "text": "In 2012, there were 14 cyclists' deaths, while in 2011 a total of 16 cyclists were killed.", + "length": 90 + }, + { + "text": "'The paramedics came but he was crushed under the wheels, there wasn't much they could do.", + "length": 90 + }, + { + "text": "'When you think you're having a bad day and you see that on the way home it makes you think.", + "length": 92 + }, + { + "text": "Richard Muzira died after being knocked from his bicycle by a lorry in Camberwell last night.", + "length": 93 + }, + { + "text": "He was crushed by a tipper lorry as it moved off from stationary during the evening rush hour.", + "length": 94 + }, + { + "text": "Candle-lit: Cyclists showed their support for the victims of London's dangerous road network .", + "length": 94 + }, + { + "text": "Many at last week's vigil (right) accused officials of not doing enough to keep cyclists safe .", + "length": 95 + }, + { + "text": "'Cyclists are much more vulnerable because they go quite slow, but turning left is always risky.", + "length": 96 + }, + { + "text": "One witness, a man who declined to be named, saw the aftermath of the accident that killed Mr Murza.", + "length": 100 + }, + { + "text": "The decision was taken at a meeting between the Metropolitan Police Service and City Hall last night.", + "length": 101 + }, + { + "text": "'They've closed a lot of roads down and I think they're trying to get the cyclist out from underneath.", + "length": 102 + }, + { + "text": "'They will be stopping lorries and cars and where there is unsafe driving they will be taken off the road.", + "length": 106 + }, + { + "text": "'Call me illiberal but it makes me absolutely terrified to see them bowling along unable to hear the traffic.", + "length": 109 + }, + { + "text": "'We are a MAD organisation, which stands for Mental Health and Disability, and it has really affected us a lot.", + "length": 111 + }, + { + "text": "He said: 'There were a lot of buses queued up, people were looking the windows as the paramedics worked on him.", + "length": 111 + }, + { + "text": "'A lot of police vehicles were there and then they were putting up a tent so that people didn't see the cyclist.", + "length": 112 + }, + { + "text": "Another eyewitness, who gave his name as Dave, said: 'The first I heard of it was a woman making a wailing noise.", + "length": 113 + }, + { + "text": "Last week Boris Johnson (left) said cyclists needed to obey the laws of the road to as not to endanger their lives.", + "length": 115 + }, + { + "text": "His name was released as new measures were announced to combat the recent spate of cyclist deaths on the capital's roads.", + "length": 121 + }, + { + "text": "Three days later architect Francis Golding, 69, died after a crash with a coach in Holborn, central London on November 5th.", + "length": 123 + }, + { + "text": "James, who refused to give his surname, was travelling home to Dulwich on the bus when he saw the aftermath of the accident.", + "length": 124 + }, + { + "text": "On November 5th hospital porter Brian Holt, 62, died on Mile End Road in east London as he cycled along Cycle Superhighway 2.", + "length": 125 + }, + { + "text": "'Cycling is so dangerous in London, people just don't see you,' added Dave, who used to cycle but is now a regular motorcyclist.", + "length": 128 + }, + { + "text": "It is a short-term measure but the Mayor and the police feel we must act as a result of the recent spate of appalling accidents.", + "length": 128 + }, + { + "text": "She added: 'He regularly volunteered at the centre and won a £3,000 prize from the Bank of America in recognition for his work.", + "length": 128 + }, + { + "text": "A cyclist receives emergency medical treatment after being involved in an accident with a lorry in Camden, north London yesterday .", + "length": 131 + }, + { + "text": "The Parliamentary Advisory Council for Transport Safety (PACTS) has called on Mr Johnson to take 'urgent action' to make cyclists safer.", + "length": 136 + }, + { + "text": "' Mr Muzira, a Zimbabwean in his 60s who lived in London for decades and worked for the organisation for seven years, was a keen cyclist.", + "length": 137 + }, + { + "text": "' The Parliamentary Advisory Council for Transport Safety (PACTS) has called on Mr Johnson to take 'urgent action' to make cyclists safer .", + "length": 139 + }, + { + "text": "Vigil: Cyclists gathered at the Bow roundabout, where Venera Minakhmetova was killed, last week to mourn those who have died on London's roads .", + "length": 144 + }, + { + "text": "'He loved to cycle, he had his house burgled earlier this year and his bike was stolen so we made a collection to help him save up for a new one.", + "length": 145 + }, + { + "text": "'When anybody dies it affects a lot of people, but Richard's death will impact a big group of people who will struggle with it, they are too upset.", + "length": 147 + }, + { + "text": "Ms Baharier added: 'The Government will not make segregated cycle lanes so cars and lorries drive into people on bikes and kill them, it's that simple.", + "length": 151 + }, + { + "text": "The sixth cyclist killed in London in the last two weeks was an 'real hero' who worked as a volunteer for a mental health charity, colleagues said today.", + "length": 153 + }, + { + "text": "But the Mayor’s cycling commissioner, Andrew Gilligan, told the Evening Standard the actions are a must if the startling spate of deaths is to be halted.", + "length": 155 + }, + { + "text": "Specialist traffic officers will be out in force on every major street, especially notorious junctions like the Bow roundabout where a cyclist died last week.", + "length": 158 + }, + { + "text": "It really, really is, he was a real hero, he was fantastic, such wonderful human being, he was just one of the best and we had worked together for a long time.", + "length": 159 + }, + { + "text": "'He was really caring, really supportive, he was very helpful to everyone, he was very passionate, he had a great sense of humour and we shared a lot of laughs together.", + "length": 169 + }, + { + "text": "From Monday, they will be on the lookout for motorists using mobile phones, drivers stopping in 'bike boxes' at traffic lights and cyclists riding on the inside of HGVs.", + "length": 169 + }, + { + "text": "Speaking this afternoon Michelle Baharier, the chief executive at CoolTan Arts, said the vulnerable people the group works with had been hit hard by the loss of Mr Muzira.", + "length": 171 + }, + { + "text": "The mayor's comments came hours after a man died after being hit by a double-decker bus in Whitechapel, east London, the second casualty on London's roads in less than 24 hours.", + "length": 177 + }, + { + "text": "He said: 'This is a new zero-tolerance approach with a police officer on every main road in central London from Monday which is a huge escalation to the checks we are doing already.", + "length": 181 + }, + { + "text": "We had three people take their own lives and two die from self-neglect, we are all distraught, it's going to be very difficult for the people we work with to come to terms with this.", + "length": 182 + }, + { + "text": "Authorities pushed through the new policing measure despite previously saying real progress on safety would come with a £35 million scheme for segregated lanes to be built next year.", + "length": 185 + }, + { + "text": "' Collision: Roger William De Klerk, 43, an IT consultant and courier, of Forest Hill, south east London was killed in Croydon on Tuesday when he was hit by a bus while cycling on tramways .", + "length": 190 + }, + { + "text": "Last week the Mayor of London was accused of shirking his responsibility and insensitivity after saying cyclists should 'think of the laws of the road' after a spate of deaths involving cyclists in the capital.", + "length": 210 + }, + { + "text": "Richard Muzira, father of two and volunteer filmmaker at CoolTan Arts, died after being dragged under the wheels of a tipper truck at a busy junction on Camberwell Road in South London just after noon on Monday.", + "length": 211 + }, + { + "text": "' As the new safety measures were announced, London Mayor Boris Johnson said he thought cyclists using headphones while riding on London's streets are an 'absolute scourge' and he would not be against banning them.", + "length": 214 + }, + { + "text": "Speaking just hours after the fifth cyclist died as a result of a crash on London's roads in just nine days, Boris Johnson appeared to blame cyclists, saying their 'hasty, rash decisions' were 'endangering' their lives.", + "length": 219 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.562560185790062 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:05.117160813Z", + "first_section_created": "2025-12-23T09:33:05.118649274Z", + "last_section_published": "2025-12-23T09:33:05.119059091Z", + "all_results_received": "2025-12-23T09:33:05.218803912Z", + "output_generated": "2025-12-23T09:33:05.219041422Z", + "total_processing_time_ms": 101, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 99, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:05.118649274Z", + "publish_time": "2025-12-23T09:33:05.118873883Z", + "first_worker_start": "2025-12-23T09:33:05.119304301Z", + "last_worker_end": "2025-12-23T09:33:05.203523Z", + "total_journey_time_ms": 84, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:05.119539111Z", + "start_time": "2025-12-23T09:33:05.119604114Z", + "end_time": "2025-12-23T09:33:05.119708418Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:05.119728Z", + "start_time": "2025-12-23T09:33:05.119877Z", + "end_time": "2025-12-23T09:33:05.203523Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 83 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:05.119395905Z", + "start_time": "2025-12-23T09:33:05.119466308Z", + "end_time": "2025-12-23T09:33:05.119584013Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:05.119241499Z", + "start_time": "2025-12-23T09:33:05.119304301Z", + "end_time": "2025-12-23T09:33:05.119365304Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:05.118919885Z", + "publish_time": "2025-12-23T09:33:05.119059091Z", + "first_worker_start": "2025-12-23T09:33:05.119619114Z", + "last_worker_end": "2025-12-23T09:33:05.217893Z", + "total_journey_time_ms": 98, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:05.119606514Z", + "start_time": "2025-12-23T09:33:05.119646015Z", + "end_time": "2025-12-23T09:33:05.119721718Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:05.119703Z", + "start_time": "2025-12-23T09:33:05.119823Z", + "end_time": "2025-12-23T09:33:05.217893Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 98 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:05.119601113Z", + "start_time": "2025-12-23T09:33:05.119666116Z", + "end_time": "2025-12-23T09:33:05.119786221Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:05.119558712Z", + "start_time": "2025-12-23T09:33:05.119619114Z", + "end_time": "2025-12-23T09:33:05.119684517Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 181, + "min_processing_ms": 83, + "max_processing_ms": 98, + "avg_processing_ms": 90, + "median_processing_ms": 98, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4817, + "slowest_section_id": 1, + "slowest_section_time_ms": 98 + } +} diff --git a/data/output/001fb4ca3bd3a0c1cd91fdc813f0ebeeac678e76.json b/data/output/001fb4ca3bd3a0c1cd91fdc813f0ebeeac678e76.json new file mode 100644 index 0000000..d1e1323 --- /dev/null +++ b/data/output/001fb4ca3bd3a0c1cd91fdc813f0ebeeac678e76.json @@ -0,0 +1,266 @@ +{ + "file_name": "001fb4ca3bd3a0c1cd91fdc813f0ebeeac678e76.txt", + "total_words": 467, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "garcia", + "count": 14 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "he", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "his", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "CNN's Catherine E.", + "length": 18 + }, + { + "text": "citizen and to practice law.", + "length": 28 + }, + { + "text": "Court: Undocumented immigrant can be lawyer .", + "length": 45 + }, + { + "text": "\"Oh, no, that's just too messed up,\" he said.", + "length": 45 + }, + { + "text": "It's the immigration system that's broken, he said.", + "length": 51 + }, + { + "text": "Shoichet and Tom Watkins contributed to this report.", + "length": 52 + }, + { + "text": "\"I wasn't smart and put all my eggs into one basket,\" he said.", + "length": 62 + }, + { + "text": "\" \"That, and I'm a little bit stubborn, anyway,\" Garcia added.", + "length": 62 + }, + { + "text": "He remained there until 1986, when he and his parents returned to Mexico.", + "length": 73 + }, + { + "text": "The visa still has not been granted, even though Garcia has lived in the state since 1994.", + "length": 90 + }, + { + "text": "\"I'm super excited to finally be able to fulfill one of my dreams,\" Garcia told CNN Friday.", + "length": 91 + }, + { + "text": "He was brought to the United States as a minor and has been in line for 19 years for a green card.", + "length": 98 + }, + { + "text": "Now that he has a law license, however, one thing that Garcia will not specialize in is immigration law.", + "length": 104 + }, + { + "text": "Garcia can be admitted to California's state bar and legally practice as a lawyer there, the court ruled.", + "length": 105 + }, + { + "text": "If anyone feels frustrated the the situation, they should address it with the federal government, Garcia said.", + "length": 110 + }, + { + "text": "Asked why he didn't choose a different career or pursue other opportunities, Garcia said law was his singular focus.", + "length": 116 + }, + { + "text": "They question how someone who is in the country without legal status can be licensed to uphold the law as an attorney.", + "length": 118 + }, + { + "text": "(CNN) -- Sergio Garcia, a 36-year-old undocumented immigrant in California, has held two lifelong dreams: to become a U.", + "length": 120 + }, + { + "text": "But the case raises many questions, particularly among those who have been critical of Garcia's efforts to practice law.", + "length": 120 + }, + { + "text": "Garcia says that this an easy initial response to make but that looking at the details of his case, it is not so clear-cut.", + "length": 123 + }, + { + "text": "Garcia was born in Mexico in 1977 and taken to California by his parents when he was 17 months old, according to court documents.", + "length": 129 + }, + { + "text": "That year, Garcia's father filed an immigration visa petition on his son's behalf, which federal immigration officials accepted in 1995.", + "length": 136 + }, + { + "text": "\"This whole idea of being an attorney was the only idea I had going, so 20 years of working on that dream, I couldn't really afford to give up on it.", + "length": 149 + }, + { + "text": "He's been waiting 19 years for a visa still stuck in a backlog, but the California Supreme Court ensured this week that his second dream will become a reality.", + "length": 159 + }, + { + "text": "California's Supreme Court ruled Thursday (PDF) that no state law or public policy should stop Garcia or others like him from obtaining a law license in the state.", + "length": 163 + }, + { + "text": "Eight years later, at age 17, Garcia again returned to California with his parents and without documentation, though his father had obtained permanent resident status in the United States.", + "length": 188 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.3965359330177307 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:05.61982618Z", + "first_section_created": "2025-12-23T09:33:05.620157594Z", + "last_section_published": "2025-12-23T09:33:05.620327301Z", + "all_results_received": "2025-12-23T09:33:05.685538695Z", + "output_generated": "2025-12-23T09:33:05.685704702Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:05.620157594Z", + "publish_time": "2025-12-23T09:33:05.620327301Z", + "first_worker_start": "2025-12-23T09:33:05.620913425Z", + "last_worker_end": "2025-12-23T09:33:05.684589Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:05.620917226Z", + "start_time": "2025-12-23T09:33:05.620995629Z", + "end_time": "2025-12-23T09:33:05.621053731Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:05.621099Z", + "start_time": "2025-12-23T09:33:05.621237Z", + "end_time": "2025-12-23T09:33:05.684589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:05.620856223Z", + "start_time": "2025-12-23T09:33:05.620913425Z", + "end_time": "2025-12-23T09:33:05.620970528Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:05.620921626Z", + "start_time": "2025-12-23T09:33:05.620991429Z", + "end_time": "2025-12-23T09:33:05.62103153Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2581, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/00206472b78ebae574c4d7869c816aa7d7f8a18e.json b/data/output/00206472b78ebae574c4d7869c816aa7d7f8a18e.json new file mode 100644 index 0000000..f06faa9 --- /dev/null +++ b/data/output/00206472b78ebae574c4d7869c816aa7d7f8a18e.json @@ -0,0 +1,242 @@ +{ + "file_name": "00206472b78ebae574c4d7869c816aa7d7f8a18e.txt", + "total_words": 524, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "as", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "wedding", + "count": 9 + }, + { + "word": "aharon", + "count": 8 + }, + { + "word": "jewish", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Aharon is wished good luck my a family member as Rebecca takes her seat next to him .", + "length": 85 + }, + { + "text": "Groom Aharon is cheered on and toasted by male friends and relatives, while his bride celebrates separately .", + "length": 109 + }, + { + "text": "Stolen glance: Bride Rebecca Hanna is seen after her wedding ceremony as she prepares to join the women's side of the dance and feast .", + "length": 135 + }, + { + "text": "Jewish bride Rebecca Hanna and her groom Aharon Cruise’s wedding in the in Jerusalem follows the strict rules of the Haredi community.", + "length": 136 + }, + { + "text": "Nerves: Aharon Cruise turns around to see his young bride Rebecca approach from the women's side of the room, which is being separated by a veil .", + "length": 146 + }, + { + "text": "Male guests of the wedding, wearing traditional clothes and donning the characteristic ringlets, known as peyos, and covering their heads with hats .", + "length": 149 + }, + { + "text": "Keeping with the traditions: Only men can be seen dancing in this picture as the women have their own celebrations on the other side of the lace veil .", + "length": 151 + }, + { + "text": "Traditional union: Ultra-Orthodox Jewish bride Rebecca Hanna and her groom Aharon Cruise sit after their wedding in the Mea Shearim neighborhood in Jerusalem .", + "length": 159 + }, + { + "text": "Musical traditions: The male guests performs customary horah - a circle dance - during which Aharon is raised in the center on a chair put on an upside down table .", + "length": 165 + }, + { + "text": "Young ultra-Orthodox Jewish boys wearing traditional headgear smoke cigarettes during the celebrations in the strict Haredi community, of which Hasidic Judaism is part .", + "length": 169 + }, + { + "text": "Pronounced husband and wife: Ultra-Orthodox Jewish bride Rebecca Hanna giggles alongside her groom Aharon Cruise as they pose for a photo after their wedding ceremony .", + "length": 169 + }, + { + "text": "Modern society intervenes: In contrast, some of the younger guests changed their traditional hats for more modern headgear as the wedding celebrations went on into the night .", + "length": 175 + }, + { + "text": "Glimpse: The photographer catches the female side of the wedding celebration for a split second as the bride Rebecca lifts the fabric keeping the two sexes separate during the party .", + "length": 183 + }, + { + "text": "For any young couple getting married, the preparations involves a lot of nervous jitters and fidgeting, however this one also follows traditions which has been followed for centuries.", + "length": 183 + }, + { + "text": "Old meets new: Although it may seem strange and dangerous to modern eyes, it is perfectly normal for young boys to celebrate the same way as their fathers, including having a cigarette .", + "length": 186 + }, + { + "text": "Day of joy: Men, young and old, dance along with the groom Aharon Cruise on his wedding day, while the women celebrate separately, on the other side of the white veil seen in the far background .", + "length": 195 + }, + { + "text": "Secret lives: The women through white lace as Haredi traditions dictate that men and women are kept separate, some communities even arguing that married men and women should not be seen together .", + "length": 196 + }, + { + "text": "The know how to throw a party: The area of Mea Shearim, which means 100 gates, is one of the oldest Jewish neighbourhoods in Jerusalem, established in 1874, and has an overwhelmingly Haredi (ultra-orthodox Jewish) population .", + "length": 226 + }, + { + "text": "This wedding took place on Tuesday in the area of Mea Shearim, which means 100 gates, is one of the oldest Jewish neighbourhoods in Jerusalem, established in 1874, and has an overwhelmingly Haredi (ultra-orthodox Jewish) population.", + "length": 232 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.46754327416419983 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:06.121260897Z", + "first_section_created": "2025-12-23T09:33:06.122570752Z", + "last_section_published": "2025-12-23T09:33:06.12276316Z", + "all_results_received": "2025-12-23T09:33:06.185606856Z", + "output_generated": "2025-12-23T09:33:06.185775963Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:06.122570752Z", + "publish_time": "2025-12-23T09:33:06.12276316Z", + "first_worker_start": "2025-12-23T09:33:06.123319183Z", + "last_worker_end": "2025-12-23T09:33:06.183383Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:06.123301282Z", + "start_time": "2025-12-23T09:33:06.123386985Z", + "end_time": "2025-12-23T09:33:06.123458288Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:06.123539Z", + "start_time": "2025-12-23T09:33:06.123684Z", + "end_time": "2025-12-23T09:33:06.183383Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:06.12326378Z", + "start_time": "2025-12-23T09:33:06.123319183Z", + "end_time": "2025-12-23T09:33:06.123438187Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:06.123311682Z", + "start_time": "2025-12-23T09:33:06.123381585Z", + "end_time": "2025-12-23T09:33:06.123456388Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3194, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0020ede07ee7ad1f6cf654c7dc678e7341d0c0e5.json b/data/output/0020ede07ee7ad1f6cf654c7dc678e7341d0c0e5.json new file mode 100644 index 0000000..6a21fd6 --- /dev/null +++ b/data/output/0020ede07ee7ad1f6cf654c7dc678e7341d0c0e5.json @@ -0,0 +1,420 @@ +{ + "file_name": "0020ede07ee7ad1f6cf654c7dc678e7341d0c0e5.txt", + "total_words": 1105, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "of", + "count": 32 + }, + { + "word": "to", + "count": 28 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "she", + "count": 23 + }, + { + "word": "her", + "count": 20 + }, + { + "word": "s", + "count": 18 + }, + { + "word": "it", + "count": 17 + } + ], + "sorted_sentences": [ + { + "text": "65 in the rankings.", + "length": 19 + }, + { + "text": "1, for 12 weeks in total.", + "length": 25 + }, + { + "text": "You need to get to know someone.", + "length": 32 + }, + { + "text": "\"It's about confidence with Ana.", + "length": 32 + }, + { + "text": "It's also true of fitness trainers.", + "length": 35 + }, + { + "text": "(CNN) -- Rip it up and start again.", + "length": 35 + }, + { + "text": "It's not just coaches that have come and gone.", + "length": 46 + }, + { + "text": "\"I think she'll find it difficult,\" added Durie.", + "length": 48 + }, + { + "text": "Her sense of trust in herself is what she needs.", + "length": 48 + }, + { + "text": "But can she beat Petra Kvitova and Serena in a row?", + "length": 51 + }, + { + "text": "the ideal preparation for next week's Australian Open.", + "length": 54 + }, + { + "text": "1 Venus Williams to end a more than two-year title drought.", + "length": 59 + }, + { + "text": "\"Tennis is a big part of my life, but it's not my whole life.", + "length": 61 + }, + { + "text": "All the portents suggested great things were ahead of the Serbian.", + "length": 66 + }, + { + "text": "So definitely I want to achieve what I can on the court and then focus.", + "length": 71 + }, + { + "text": "\" Ivanovic's solitary grand slam win helped the baseliner become the world No.", + "length": 78 + }, + { + "text": "\"She's capable of big wins, she can certainly beat players like Serena Williams.", + "length": 80 + }, + { + "text": "\"A lot of players on tour jump around with coaches, I can never understand that.", + "length": 80 + }, + { + "text": "Family is a big part of my life and I want to have lots of kids of my own one day.", + "length": 82 + }, + { + "text": "As a 20-year-old, Ana Ivanovic claimed the French Open on Roland Garros' clay courts.", + "length": 85 + }, + { + "text": "Ivanovic, now 26, admitted that she too has started thinking about life after tennis.", + "length": 85 + }, + { + "text": "\"She can beat the top players, but to win a grand slam you have to win seven matches.", + "length": 85 + }, + { + "text": "\"I don't want to put a date to it because I think you feel it when the time is right.", + "length": 85 + }, + { + "text": "Wrist, shoulder, foot, abdominal and hip injuries all took their toll as Ivanovic fell to No.", + "length": 93 + }, + { + "text": "\"I have a new team with me since Wimbledon and it's a Serbian team for me for the first time.", + "length": 93 + }, + { + "text": "\"I'm really enjoying someone who speaks the same language and can understand you,\" Ivanovic said.", + "length": 97 + }, + { + "text": "\" While her playing fortunes might have fluctuated, Ivanovic's marketability has never been dented.", + "length": 99 + }, + { + "text": "A number of others have also helped her temporarily as part of the Adidas Player Development program.", + "length": 101 + }, + { + "text": "Kicking off the 2014 season in Auckland, New Zealand, she ground out a victory against fellow former No.", + "length": 104 + }, + { + "text": "\"I still feel like there is so much I can achieve and so many tournaments I can win,\" she told the Belgian.", + "length": 107 + }, + { + "text": "\"I know it's difficult to fit it all around it, but Asia at the end of the year really gets a lot of players.", + "length": 109 + }, + { + "text": "Such constant chopping and changing suggests a player stuck in a rut, desperately searching for a way out of it.", + "length": 112 + }, + { + "text": "Her latest campaign, though, does not look like being clouded by such distraction or, just as importantly, injury.", + "length": 114 + }, + { + "text": "Despite Ivanovic's obvious talent, Durie doubts whether her game has the consistency required to win a grand slam.", + "length": 114 + }, + { + "text": "Here was a tennis player with an impressive forehand and serve, with the added bonus of being incredibly marketable.", + "length": 116 + }, + { + "text": "It is arguable she has also endured something of an identity crisis, chopping and changing coaching teams along the way.", + "length": 120 + }, + { + "text": "Since that win in Paris in 2008, Ivanovic has suffered from big-match nerves, serving woes and a series of injury problems.", + "length": 123 + }, + { + "text": "\"I've been working really hard,\" Ivanovic told retired grand slam champion Kim Clijsters in an interview for CNN's Open Court show.", + "length": 131 + }, + { + "text": "They have all been part of her entourage since her split with British coach Nigel Sears in July, following a second-round exit at Wimbledon.", + "length": 140 + }, + { + "text": "Her career is littered with coaches who have come and gone as she has searched for a winning formula to challenge consistently for grand slams.", + "length": 143 + }, + { + "text": "\"We are also having more fun and a lot of laughs on the court as well to make it interesting, because the year gets very long,\" she said of her team.", + "length": 149 + }, + { + "text": "\" Clijsters retired for a second time in 2012, having won four grand slams, as she decided to focus on her family -- and has since had a second child.", + "length": 150 + }, + { + "text": "\"She's hooked up with someone she has trust in and she's finding herself,\" Jo Durie, who won mixed doubles at the Australian Open and Wimbledon, told CNN.", + "length": 154 + }, + { + "text": "Employing a coaching team made up of her compatriots could be key to Ivanovic performing consistently, according to a former grand slam champion turned coach.", + "length": 158 + }, + { + "text": "Since parting company with her early mentor Zoltan Kuharsky in 2006, she has employed David Taylor, Craig Kardon, Heinz Gunthardt, Antonio van Grichen and Sears.", + "length": 161 + }, + { + "text": "Her desire to follow up that 2008 French Open win has also led Ivanovic to ponder why she picked up a racket at the age of five in the first place -- for the enjoyment.", + "length": 168 + }, + { + "text": "Although she acknowledges that there have been improvements in the demands of the WTA Tour calendar, parts of the worldwide schedule are still difficult for the players.", + "length": 169 + }, + { + "text": "Her continuing search to help solve this problem and allow her to feel comfortable in her own skin has led her to appointing a support network who speak the same language.", + "length": 171 + }, + { + "text": "\" Ivanovic's new team includes coach and hitting partner Nemanja Kontic -- who represented Montenegro in the Davis Cup -- fitness coach Zlatko Novkovic and physio Branko Penic.", + "length": 176 + }, + { + "text": "\"Especially at the end of the year when from America we mostly go back to Europe for a week and then we go to Asia for quite a few weeks, so that's kind of tiring and hard,\" she said.", + "length": 183 + }, + { + "text": "Now ranked 14th, she has been outside of the top 10 since June 2009, partly explained by her inability to reach the final four of a grand slam since that 2008 win over Dinara Safina in Paris.", + "length": 191 + }, + { + "text": "But six years on, much like the characters Vladimir and Estragon in Samuel Beckett's play \"Waiting for Godot\" -- a drama about the passing of time -- the wait for a second grand slam shows no sign of ending.", + "length": 207 + }, + { + "text": "According to Forbes, she was the ninth highest-paid female athlete in 2013 with total earnings of $7 million -- brought in largely thanks to lucrative sponsorship deals including Adidas, Yonex, Juice Plux and Dubai Duty Free.", + "length": 225 + }, + { + "text": "In Kontic, Ivanovic may not have a wise professor on her hands as she did with Sears, yet the 32-year-old, ranked 1,635th in men's doubles, is able to offer her something that respect within the game cannot always buy -- a shared cultural identity.", + "length": 248 + }, + { + "text": "Ivanovic has had a number of high-profile boyfriends -- including Masters-winning golfer Adam Scott and fellow tennis player Fernando Verdasco -- but as Caroline Wozniacki has discovered, it is tough combining consistency on court with such a relationship.", + "length": 256 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.3623127043247223 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:06.623513823Z", + "first_section_created": "2025-12-23T09:33:06.62552061Z", + "last_section_published": "2025-12-23T09:33:06.626042533Z", + "all_results_received": "2025-12-23T09:33:06.71517383Z", + "output_generated": "2025-12-23T09:33:06.715389439Z", + "total_processing_time_ms": 91, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 89, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:06.62552061Z", + "publish_time": "2025-12-23T09:33:06.625832224Z", + "first_worker_start": "2025-12-23T09:33:06.626111936Z", + "last_worker_end": "2025-12-23T09:33:06.714222Z", + "total_journey_time_ms": 88, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:06.626100336Z", + "start_time": "2025-12-23T09:33:06.62619224Z", + "end_time": "2025-12-23T09:33:06.626277243Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:06.626526Z", + "start_time": "2025-12-23T09:33:06.626657Z", + "end_time": "2025-12-23T09:33:06.714222Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 87 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:06.626036133Z", + "start_time": "2025-12-23T09:33:06.626111936Z", + "end_time": "2025-12-23T09:33:06.626248442Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:06.626040933Z", + "start_time": "2025-12-23T09:33:06.626116036Z", + "end_time": "2025-12-23T09:33:06.626185039Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:06.625915028Z", + "publish_time": "2025-12-23T09:33:06.626042533Z", + "first_worker_start": "2025-12-23T09:33:06.626359647Z", + "last_worker_end": "2025-12-23T09:33:06.706406Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:06.626506954Z", + "start_time": "2025-12-23T09:33:06.626546955Z", + "end_time": "2025-12-23T09:33:06.626573256Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:06.62667Z", + "start_time": "2025-12-23T09:33:06.626793Z", + "end_time": "2025-12-23T09:33:06.706406Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:06.626494553Z", + "start_time": "2025-12-23T09:33:06.626530455Z", + "end_time": "2025-12-23T09:33:06.626573956Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:06.626313845Z", + "start_time": "2025-12-23T09:33:06.626359647Z", + "end_time": "2025-12-23T09:33:06.626372648Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 166, + "min_processing_ms": 79, + "max_processing_ms": 87, + "avg_processing_ms": 83, + "median_processing_ms": 87, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3041, + "slowest_section_id": 0, + "slowest_section_time_ms": 88 + } +} diff --git a/data/output/00211175819295755ed12e89791b4d543442981a.json b/data/output/00211175819295755ed12e89791b4d543442981a.json new file mode 100644 index 0000000..8bbdb3d --- /dev/null +++ b/data/output/00211175819295755ed12e89791b4d543442981a.json @@ -0,0 +1,242 @@ +{ + "file_name": "00211175819295755ed12e89791b4d543442981a.txt", + "total_words": 463, + "top_n_words": [ + { + "word": "the", + "count": 35 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "city", + "count": 9 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "said", + "count": 8 + }, + { + "word": "hiring", + "count": 7 + }, + { + "word": "a", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "\" Bloomberg strongly disagreed with the decision.", + "length": 49 + }, + { + "text": "\"We're glad to see the judge properly addressing this issue.", + "length": 60 + }, + { + "text": "Litigation against the city's firefighter hiring practices began in 2007, when the U.", + "length": 85 + }, + { + "text": "Mark LaVorgna, a spokesman for the mayor, said the city intends to appeal the decision.", + "length": 87 + }, + { + "text": "\"We're very pleased to see this order and it's certainly long overdue,\" Washington said.", + "length": 88 + }, + { + "text": "Paul Washington, a representative of the Vulcan Society, one of the plaintiffs in the case, praised the decision.", + "length": 113 + }, + { + "text": "\"Four years of litigation and two adverse liability rulings later, the City still doesn't get it,\" Garaufis said.", + "length": 113 + }, + { + "text": "\"The City's senior leaders have routinely denied that they are responsible or doing anything to remedy nearly forty years of discrimination.", + "length": 140 + }, + { + "text": "Department of Justice filed a complaint alleging the Fire Department of New York's hiring exams negatively affected black and Hispanic applicants.", + "length": 146 + }, + { + "text": "New York (CNN) -- A federal judge has ordered that an official monitor be put in place to prevent discrimination in the hiring of New York City firefighters.", + "length": 157 + }, + { + "text": "Garaufis cited \"the clear evidence of disparate impact that Mayor (Michael) Bloomberg and his senior leadership chose to ignore was obvious to anyone else who looked.", + "length": 166 + }, + { + "text": "\" Bloomberg said 61,000 people, more than half of them minorities, applied to the fire department in the last recruiting campaign, \"shattering any previous record for minority applicants.", + "length": 187 + }, + { + "text": "\" The order requires the city to take remedial steps to fix discriminatory hiring practices and puts the court monitor in place for at least the next 10 years to make sure those steps are taken.", + "length": 194 + }, + { + "text": "\" Garaufis acknowledged that the city has improved its minority recruiting, but he said the subsequent hiring processes and \"discriminatory testing procedures\" have kept many of these minorities from actually being hired.", + "length": 221 + }, + { + "text": "\" \"Instead of facing hard facts and asking hard questions about the City's abysmal track record of hiring black and Hispanic firefighters, the Bloomberg Administration dug in and fought back,\" the judge said in his ruling.", + "length": 222 + }, + { + "text": "\"I think it's fair to say no previous administration has done more or been as successful in attracting the diversity to the FDNY than we have,\" the mayor told reporters Wednesday, \"and I couldn't feel more strongly about it.", + "length": 224 + }, + { + "text": "\" Details of the court-appointed monitoring, as well as logistics for a future fairness hearing in which third parties will be able to express their opinions, is scheduled for October 20, said Darius Charney, an attorney for the plaintiffs.", + "length": 240 + }, + { + "text": "District Judge Nicholas Garaufis said the city needs \"to comprehensively reassess its policies and practices, to analyze the evidence showing the effect of those policies and practices, and to rationally consider how they can be changed to achieve a firefighter hiring process that is -- in actual practice and effect -- fair and open to all.", + "length": 342 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5074040293693542 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:07.126800625Z", + "first_section_created": "2025-12-23T09:33:07.12714064Z", + "last_section_published": "2025-12-23T09:33:07.127325848Z", + "all_results_received": "2025-12-23T09:33:07.189218354Z", + "output_generated": "2025-12-23T09:33:07.18935446Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:07.12714064Z", + "publish_time": "2025-12-23T09:33:07.127325848Z", + "first_worker_start": "2025-12-23T09:33:07.127870172Z", + "last_worker_end": "2025-12-23T09:33:07.186566Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:07.127860071Z", + "start_time": "2025-12-23T09:33:07.127929975Z", + "end_time": "2025-12-23T09:33:07.127991977Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:07.127978Z", + "start_time": "2025-12-23T09:33:07.128121Z", + "end_time": "2025-12-23T09:33:07.186566Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:07.12782397Z", + "start_time": "2025-12-23T09:33:07.127870172Z", + "end_time": "2025-12-23T09:33:07.127938575Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:07.12781787Z", + "start_time": "2025-12-23T09:33:07.127884073Z", + "end_time": "2025-12-23T09:33:07.127905373Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2852, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/002124147bd10996f410d1a117ddf298018dbced.json b/data/output/002124147bd10996f410d1a117ddf298018dbced.json new file mode 100644 index 0000000..5d784eb --- /dev/null +++ b/data/output/002124147bd10996f410d1a117ddf298018dbced.json @@ -0,0 +1,512 @@ +{ + "file_name": "002124147bd10996f410d1a117ddf298018dbced.txt", + "total_words": 1151, + "top_n_words": [ + { + "word": "the", + "count": 71 + }, + { + "word": "and", + "count": 43 + }, + { + "word": "to", + "count": 43 + }, + { + "word": "of", + "count": 36 + }, + { + "word": "isis", + "count": 33 + }, + { + "word": "is", + "count": 21 + }, + { + "word": "it", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "s", + "count": 17 + }, + { + "word": "in", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "\" The U.", + "length": 8 + }, + { + "text": "1\" of Islam.", + "length": 12 + }, + { + "text": "should seek a U.", + "length": 16 + }, + { + "text": "In Iraq alone, 1.", + "length": 17 + }, + { + "text": "should think again.", + "length": 19 + }, + { + "text": "At the same time, the U.", + "length": 24 + }, + { + "text": "But the group is a growing threat.", + "length": 34 + }, + { + "text": "and are prepared to do the unthinkable.", + "length": 39 + }, + { + "text": "government has crucial steps to take now.", + "length": 41 + }, + { + "text": "government and to the international community.", + "length": 46 + }, + { + "text": "They are virulently opposed to the West, to the U.", + "length": 50 + }, + { + "text": "The entire world is appalled by the brutal murder.", + "length": 50 + }, + { + "text": "That includes the overwhelming majority of Muslims.", + "length": 51 + }, + { + "text": "Countless Muslims have criticized and condemned them.", + "length": 53 + }, + { + "text": "If Osama bin Laden weren't dead, he would die of envy.", + "length": 54 + }, + { + "text": "And ISIS ideology is gaining support in the continent.", + "length": 54 + }, + { + "text": "2 million people have been displaced, thousands killed.", + "length": 55 + }, + { + "text": "ISIS is the enemy of anyone who does not belong to ISIS.", + "length": 56 + }, + { + "text": "To the shock of Europeans, they were heard speaking Dutch.", + "length": 58 + }, + { + "text": "It must be bolstered with material and diplomatic support.", + "length": 58 + }, + { + "text": "There are hundreds of Germans, Spaniards, Belgians, French.", + "length": 59 + }, + { + "text": "does not stop helping those fighting to stop ISIS advances.", + "length": 59 + }, + { + "text": "There are few people on Earth who are not horrified by ISIS.", + "length": 60 + }, + { + "text": "If it does, the consequences will become even more catastrophic.", + "length": 64 + }, + { + "text": "When ISIS calls itself a state, it is not hyperbole by very much.", + "length": 65 + }, + { + "text": "\" ISIS can simply not be allowed to keep a foothold in the Middle East.", + "length": 71 + }, + { + "text": "Before ISIS, we knew that human beings are capable of unspeakable brutality.", + "length": 76 + }, + { + "text": "Britain has confirmed that Foley's killer was most likely a British citizen.", + "length": 76 + }, + { + "text": "They kill minorities, Shiite Muslims and Sunnis who don't abide by their views.", + "length": 79 + }, + { + "text": "Graduates of the Syria war, from where ISIS pushed into Iraq, have killed in Europe.", + "length": 84 + }, + { + "text": "The strategy of supporting the Kurds and the Iraqis in the front lines is a good one.", + "length": 85 + }, + { + "text": "In June 2013, a video from Syria surfaced, showing men cutting off another man's head.", + "length": 86 + }, + { + "text": "But it seems no group has advertised its bloodlust with such relish and effectiveness.", + "length": 86 + }, + { + "text": "If it proves insufficient to turning back the bloody ISIS tide, then it must be revamped.", + "length": 89 + }, + { + "text": "The grand mufti of Saudi Arabia, Abdul Aziz al-Sheikh, called ISIS and al Qaeda \"Enemy No.", + "length": 90 + }, + { + "text": "\" President Obama said Wednesday that \"We will do everything we can to protect our people ...", + "length": 93 + }, + { + "text": "The ISIS members who hold European passports are able to travel freely across Europe and the U.", + "length": 95 + }, + { + "text": ", to modernity and to anyone who sees the world differently from their narrow medieval perspective.", + "length": 99 + }, + { + "text": "should make a strong diplomatic push to obtain international legitimacy for the campaign to defeat ISIS.", + "length": 104 + }, + { + "text": "That suffering has now extended to Iraq, and it will only become more widespread if ISIS is not stopped.", + "length": 104 + }, + { + "text": "The sickening execution, recorded and released online for the world to see, came with a warning to the U.", + "length": 105 + }, + { + "text": "Foley's mother said her son gave his life trying to expose to the world the suffering of the Syrian people.", + "length": 107 + }, + { + "text": "More important, those using these methods, embracing this philosophy, are in control of enormous territories.", + "length": 109 + }, + { + "text": "It is now financially self-sufficient, collecting millions of dollars every day from oil smuggling operations.", + "length": 110 + }, + { + "text": "There have been reports of hundreds, even thousands, of Europeans training, fighting and killing alongside ISIS.", + "length": 112 + }, + { + "text": "But anyone who thought man's inhumanity to man had eased after the mass crimes of the 20th century now knows better.", + "length": 116 + }, + { + "text": "ISIS has established and gained full dominion not only of cities and populations but of wealthy oil-producing lands.", + "length": 116 + }, + { + "text": "effort should keep a special focus on helping America's loyal and ideologically moderate friends, the Kurds of Iraq.", + "length": 116 + }, + { + "text": ": ISIS showed another captive American journalist, believed to be Steven Sotloff, and threatened to kill him too if the U.", + "length": 122 + }, + { + "text": "resolution declaring that the international community, including the Muslim world, considers ISIS and its methods repugnant.", + "length": 124 + }, + { + "text": "The killing of Foley, an idealistic journalist, sharpens our understanding of the organization seeking to dominate the Middle East.", + "length": 131 + }, + { + "text": "Any country that disagrees, any government that is not revolted by ISIS and troubled by its methods and its goals, should go on record saying so.", + "length": 145 + }, + { + "text": "As it makes increasingly clear what kind of an organization it is, ISIS is sending a message: \"Stay out of this, so we can keep driving toward our objective.", + "length": 157 + }, + { + "text": "Last month, ISIS flags flew in an anti-Israel demonstration in the Hague, chanting against America and the West and most enthusiastically, \"Death to the Jews.", + "length": 158 + }, + { + "text": "ISIS has taken over a a tract of land bigger than many countries, something that al Qaeda, its comparatively mild-mannered inspiration, never came close to achieving.", + "length": 166 + }, + { + "text": "The killing and the threat, along with all the evidence ISIS is leaving as it gouges its way across the region, are a direct challenge to the American people, to the U.", + "length": 168 + }, + { + "text": "Its leader, Abu Bakr al-Baghdadi, incidentally, claims to rule over all Muslims and believes the ultimate goal of ISIS is to take over huge sections of Asia, Europe and Africa.", + "length": 176 + }, + { + "text": "ISIS is not the first to murder victims in large numbers; it is not the first to kill those who disagree with its beliefs or who belong to different ethnic or religious groups.", + "length": 176 + }, + { + "text": "ISIS didn't just remind us how cruel humans can be; it has taken the use of brutality as a weapon of intimidation, extermination, genocide and recruitment propaganda to new levels.", + "length": 180 + }, + { + "text": "It is politically and strategically complicated, because ISIS is also fighting Syrian President Bashar al-Assad and Hezbollah, and defeating ISIS would also be enormously pleasing to Iran.", + "length": 188 + }, + { + "text": "It is important to prevent ISIS from scoring a recruiting victory among Muslims and anti-Western and anti-American camps by portraying this as a war between Islam and the West, which it is not.", + "length": 193 + }, + { + "text": "That the man who murdered him might have been British should erase any remaining fantasy in the West that this gruesome war, now raging in Syria and Iraq, will stay within any country's or any region's borders.", + "length": 210 + }, + { + "text": "If not stopped, it could continue its push toward the oil fields of southern Iraq at the edge of the Persian Gulf, which remains the epicenter of oil and gas production that allows the global economy to function.", + "length": 212 + }, + { + "text": "(CNN) -- American journalist James Foley was murdered, beheaded by an English-speaking member of ISIS, the extremist group that calls itself the Islamic State and has already conquered large swaths of two Middle Eastern countries.", + "length": 230 + }, + { + "text": "First, obviously, it cannot give into ISIS threats and must continue helping dislodge ISIS from northern Iraq where it is engaging in ethnic cleansing against Christians and other minorities; kidnapping, raping and selling women; and massacring people.", + "length": 252 + }, + { + "text": "ISIS views the videos of mass executions, of severed heads on poles and of crucified men, as a way to keep its enemies frightened and weakened, and a way to tell prospective recruits that it is fearless in its war to create an Islamic caliphate ruling over all the world's Muslims.", + "length": 281 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8319923281669617 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:07.62809244Z", + "first_section_created": "2025-12-23T09:33:07.628500058Z", + "last_section_published": "2025-12-23T09:33:07.628882075Z", + "all_results_received": "2025-12-23T09:33:07.756539856Z", + "output_generated": "2025-12-23T09:33:07.756771166Z", + "total_processing_time_ms": 128, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 127, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:07.628500058Z", + "publish_time": "2025-12-23T09:33:07.628721668Z", + "first_worker_start": "2025-12-23T09:33:07.629395297Z", + "last_worker_end": "2025-12-23T09:33:07.755586Z", + "total_journey_time_ms": 127, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:07.6294553Z", + "start_time": "2025-12-23T09:33:07.629546104Z", + "end_time": "2025-12-23T09:33:07.629640808Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:07.62964Z", + "start_time": "2025-12-23T09:33:07.629859Z", + "end_time": "2025-12-23T09:33:07.755586Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 125 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:07.629418798Z", + "start_time": "2025-12-23T09:33:07.629482901Z", + "end_time": "2025-12-23T09:33:07.629607307Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:07.629332995Z", + "start_time": "2025-12-23T09:33:07.629395297Z", + "end_time": "2025-12-23T09:33:07.629469201Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:07.62876427Z", + "publish_time": "2025-12-23T09:33:07.628882075Z", + "first_worker_start": "2025-12-23T09:33:07.629536204Z", + "last_worker_end": "2025-12-23T09:33:07.717404Z", + "total_journey_time_ms": 88, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:07.629477401Z", + "start_time": "2025-12-23T09:33:07.629536204Z", + "end_time": "2025-12-23T09:33:07.629643508Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:07.629703Z", + "start_time": "2025-12-23T09:33:07.629822Z", + "end_time": "2025-12-23T09:33:07.717404Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 87 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:07.629494802Z", + "start_time": "2025-12-23T09:33:07.629548304Z", + "end_time": "2025-12-23T09:33:07.629597606Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:07.629499402Z", + "start_time": "2025-12-23T09:33:07.629571905Z", + "end_time": "2025-12-23T09:33:07.629590406Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 212, + "min_processing_ms": 87, + "max_processing_ms": 125, + "avg_processing_ms": 106, + "median_processing_ms": 125, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3364, + "slowest_section_id": 0, + "slowest_section_time_ms": 127 + } +} diff --git a/data/output/002175ac42ef0c91b9fb7e07259413a8ee3979a3.json b/data/output/002175ac42ef0c91b9fb7e07259413a8ee3979a3.json new file mode 100644 index 0000000..ee7e5ff --- /dev/null +++ b/data/output/002175ac42ef0c91b9fb7e07259413a8ee3979a3.json @@ -0,0 +1,464 @@ +{ + "file_name": "002175ac42ef0c91b9fb7e07259413a8ee3979a3.txt", + "total_words": 1149, + "top_n_words": [ + { + "word": "the", + "count": 48 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "in", + "count": 31 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "s", + "count": 22 + }, + { + "word": "for", + "count": 20 + }, + { + "word": "is", + "count": 19 + }, + { + "word": "egypt", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "there is none.", + "length": 14 + }, + { + "text": "And forget technology.", + "length": 22 + }, + { + "text": "Still, she continues to push on.", + "length": 32 + }, + { + "text": "Their scores chart their future.", + "length": 32 + }, + { + "text": "She wants to open a school one day.", + "length": 35 + }, + { + "text": "We'll be allowed to draw with color.", + "length": 36 + }, + { + "text": "\"So, that's what we're trying to do.", + "length": 36 + }, + { + "text": "\"So most of us just sat on the floor.", + "length": 37 + }, + { + "text": "How to help | Take action with 10x10 .", + "length": 38 + }, + { + "text": "But she has big dreams about education.", + "length": 39 + }, + { + "text": "\" It sounds like an almost hopeless picture.", + "length": 44 + }, + { + "text": "And now, despite a revolution for dignity ...", + "length": 45 + }, + { + "text": "lessons mandated by the government curriculum.", + "length": 46 + }, + { + "text": "The American pop culture reference is lost on her.", + "length": 50 + }, + { + "text": "Desks and a stable electricity supply are luxuries.", + "length": 51 + }, + { + "text": "Also, 70% of young women in Upper Egypt are jobless.", + "length": 52 + }, + { + "text": "The status quo is even more somber for Egypt's women.", + "length": 53 + }, + { + "text": "\"I have kids of my own I'm struggling to take care for.", + "length": 55 + }, + { + "text": "\"Mubarak's regime trained students to be loyal citizens.", + "length": 56 + }, + { + "text": "More: Interactive -- Impossible odds, unstoppable girls .", + "length": 57 + }, + { + "text": "If they don't do well, they won't get a place in college.", + "length": 57 + }, + { + "text": "Public school teachers rarely make more than $300 a month.", + "length": 58 + }, + { + "text": "She is waiting listlessly for a lesson with her math tutor.", + "length": 59 + }, + { + "text": "But they quickly fall silent when pushed to articulate plans.", + "length": 61 + }, + { + "text": "\"Teachers will show up and we'll be allowed to ask questions.", + "length": 61 + }, + { + "text": "Open letter from Christiane Amanpour: It's time to power the world .", + "length": 68 + }, + { + "text": "By late May, Nafham's YouTube channel had more than 1 million views.", + "length": 68 + }, + { + "text": "Egypt's final secondary school exams are a rite of passage for students.", + "length": 72 + }, + { + "text": "Egypt's literacy rate is 66%, according to a 2011 United Nations report.", + "length": 72 + }, + { + "text": "More: CNN's \"Girl Rising\" Interactive: Impossible odds, unstoppable girls .", + "length": 75 + }, + { + "text": "Many schools look more like rank penitentiaries rather than hubs of learning.", + "length": 77 + }, + { + "text": "\"At my school, we'll learn,\" she says, brushing her hands longingly over the slide.", + "length": 83 + }, + { + "text": "\"We didn't have enough desks last year,\" recalls Asmaa's 12-year-old neighbor, Omnia.", + "length": 85 + }, + { + "text": "Ramadan says she's running into problems, however, while trying to implement her idea.", + "length": 86 + }, + { + "text": "\"There are too many issues to deal with,\" said one 32-year-old teacher in Asmaa's village.", + "length": 90 + }, + { + "text": "However, as Egypt's public education system founders, a few innovative ideas have emerged.", + "length": 90 + }, + { + "text": "The bureaucracy in Egypt leaves little room for productivity, let alone creativity and innovation.", + "length": 98 + }, + { + "text": "Students and teachers seem to be on the verge of exhaustion rather than bursting with inspiration.", + "length": 98 + }, + { + "text": "For parents with any hope that their children will be better off, investing in education is essential.", + "length": 102 + }, + { + "text": "\" He says he gives three hours of private tutoring in the evening and does mechanical work on the side.", + "length": 103 + }, + { + "text": "It's impossible to know how much money is spent in all, but some estimates put the total at $1 billion a year.", + "length": 110 + }, + { + "text": "It is also in talks with some companies to offer USBs with Internet access to groups throughout the countryside.", + "length": 112 + }, + { + "text": "We only get a little paper, but my mom found this,\" she said, holding up a small, faded \"Hannah Montana\" notebook.", + "length": 114 + }, + { + "text": "The bright-eyed teenager lives in a sepia-toned village in the province of Qena, a place of rural poverty and neglect.", + "length": 118 + }, + { + "text": "Politicians, whether they're from the ruling Muslim Brotherhood or the opposition, agree that educational reform is needed.", + "length": 123 + }, + { + "text": "The situation is worst in regions far from the capital, and in Upper Egypt, where more than half the population is under 29.", + "length": 124 + }, + { + "text": "\"We could all stand around and protest that the government is failing us, or we could go out and offer the solutions,\" she said.", + "length": 128 + }, + { + "text": "This year, a few Egyptian entrepreneurs have launched Nafham, a Web-based startup that features crowd-sourced educational videos.", + "length": 129 + }, + { + "text": "Nafham, which means \"We understand\" in Arabic, hopes to provide an alternative -- a virtual classroom -- for struggling Egyptian families.", + "length": 138 + }, + { + "text": "Qena, Egypt (CNN) -- In a deserted playground a few hundred miles south of Cairo, 13-year-old Asmaa Ashraf fiddles with a broken rusted slide.", + "length": 142 + }, + { + "text": "More than a few of them say they teach the bare minimum in class so that they can earn more from the same students in private tutoring sessions.", + "length": 144 + }, + { + "text": "Two and a half years after the country's uprising began, Egypt's fledgling democracy is stillborn, stubbornly stuck between its past and future.", + "length": 144 + }, + { + "text": "One politician said the country simply has \"bigger fish to fry,\" with a controversial new constitution and still no full, functioning parliament.", + "length": 145 + }, + { + "text": "\" Such aspirations, however, amount to fantasy for most youth in a country still struggling to land on its feet after being turned completely upside down.", + "length": 154 + }, + { + "text": "\" But until those solutions are offered, Asmaa -- and a whole generation in waiting -- will continue to linger near broken slides, daydreaming about the future.", + "length": 160 + }, + { + "text": "In the World Economic Forum's latest report on global competitiveness, Egypt ranked near the bottom -- 131st out of 144 countries -- for quality of primary education.", + "length": 166 + }, + { + "text": "To make up for the gaps in education, millions of middle-class Egyptian families spend a large part of their income -- sometimes as much as 25% -- on private tutoring.", + "length": 167 + }, + { + "text": "Meanwhile, a report by London think tank Chatham House says just $129 a year is spent on each Egyptian student; the United States, for example, spends 40 times as much.", + "length": 168 + }, + { + "text": "Since the website went live in October, Nafham's staff of teachers created around 4,900 videos, while 1,000 videos were crowd-sourced -- reviewed and approved by the staff.", + "length": 172 + }, + { + "text": "For the 65% of Egyptians who don't have Internet access -- those who stand to gain the most from the service -- Nafham says it hopes to form group viewings in some villages.", + "length": 173 + }, + { + "text": "Another innovative initiative is Teach for Egypt, a start-up created by Nada Ramadan, a 24-year-old Egyptian who's a graduate student at Georgetown University in Washington.", + "length": 173 + }, + { + "text": "According to a recent World Bank report, the illiteracy rate for young people in Upper Egypt is 17%, higher than the national average of 11%, and the illiteracy rates for females is 24%, almost twice that of males.", + "length": 214 + }, + { + "text": "But with unemployment at staggering rates -- 33% for men age 20-24 and 53% for women in the same age -- Egypt has a highly combustible pool of frustrated and disenfranchised youth in danger of becoming a lost generation.", + "length": 220 + }, + { + "text": "And as the government struggles to wade through the country's protracted political problems, Egypt's festering education system is orphaned -- even though, with a growing youth population, it's key to the country's future.", + "length": 222 + }, + { + "text": "\"This is a generation that desperately needs to learn how to critically think, to learn how to be in the 21st century,\" said Malak Zalouk, director of the Middle East Institute for Higher Education at the American University in Cairo.", + "length": 234 + }, + { + "text": "Based on the Teach for America model, Ramadan plans to recruit ambitious college graduates -- most from within the Egyptian community and diaspora -- to commit to a two-year service in which they are trained extensively and placed in underprivileged schools.", + "length": 258 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5678609609603882 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:08.129852876Z", + "first_section_created": "2025-12-23T09:33:08.130180591Z", + "last_section_published": "2025-12-23T09:33:08.130519705Z", + "all_results_received": "2025-12-23T09:33:08.254911043Z", + "output_generated": "2025-12-23T09:33:08.255173255Z", + "total_processing_time_ms": 125, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 124, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:08.130180591Z", + "publish_time": "2025-12-23T09:33:08.1303887Z", + "first_worker_start": "2025-12-23T09:33:08.130958625Z", + "last_worker_end": "2025-12-23T09:33:08.204198Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:08.131029428Z", + "start_time": "2025-12-23T09:33:08.13109003Z", + "end_time": "2025-12-23T09:33:08.131177334Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:08.131274Z", + "start_time": "2025-12-23T09:33:08.131409Z", + "end_time": "2025-12-23T09:33:08.204198Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:08.131006427Z", + "start_time": "2025-12-23T09:33:08.131057629Z", + "end_time": "2025-12-23T09:33:08.131189935Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:08.130900522Z", + "start_time": "2025-12-23T09:33:08.130958625Z", + "end_time": "2025-12-23T09:33:08.13131424Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:08.130422601Z", + "publish_time": "2025-12-23T09:33:08.130519705Z", + "first_worker_start": "2025-12-23T09:33:08.131056329Z", + "last_worker_end": "2025-12-23T09:33:08.253969Z", + "total_journey_time_ms": 123, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:08.131106931Z", + "start_time": "2025-12-23T09:33:08.131147833Z", + "end_time": "2025-12-23T09:33:08.131181334Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:08.131291Z", + "start_time": "2025-12-23T09:33:08.131434Z", + "end_time": "2025-12-23T09:33:08.253969Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 122 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:08.131006427Z", + "start_time": "2025-12-23T09:33:08.131056329Z", + "end_time": "2025-12-23T09:33:08.13109383Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:08.131065029Z", + "start_time": "2025-12-23T09:33:08.13109223Z", + "end_time": "2025-12-23T09:33:08.131125232Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 194, + "min_processing_ms": 72, + "max_processing_ms": 122, + "avg_processing_ms": 97, + "median_processing_ms": 122, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3365, + "slowest_section_id": 1, + "slowest_section_time_ms": 123 + } +} diff --git a/data/output/0021e409ea9f1b68661a18a49dd1136375d6d52c.json b/data/output/0021e409ea9f1b68661a18a49dd1136375d6d52c.json new file mode 100644 index 0000000..2947f25 --- /dev/null +++ b/data/output/0021e409ea9f1b68661a18a49dd1136375d6d52c.json @@ -0,0 +1,408 @@ +{ + "file_name": "0021e409ea9f1b68661a18a49dd1136375d6d52c.txt", + "total_words": 1214, + "top_n_words": [ + { + "word": "the", + "count": 57 + }, + { + "word": "i", + "count": 35 + }, + { + "word": "and", + "count": 32 + }, + { + "word": "to", + "count": 31 + }, + { + "word": "of", + "count": 29 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "it", + "count": 20 + }, + { + "word": "that", + "count": 20 + }, + { + "word": "was", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "co.", + "length": 3 + }, + { + "text": "long-term aim.", + "length": 14 + }, + { + "text": "Visit investec.", + "length": 15 + }, + { + "text": "I auctioned the wristbands.", + "length": 27 + }, + { + "text": "VIDEO Ali targets ODI place .", + "length": 29 + }, + { + "text": "‘I don’t know what it was.", + "length": 30 + }, + { + "text": "'I still believe what I believe in.", + "length": 35 + }, + { + "text": "uk/cricket or follow @InvestecCricket.", + "length": 38 + }, + { + "text": "I was pleased some good came out of it.", + "length": 39 + }, + { + "text": "Maybe because my family are from Kashmir.", + "length": 41 + }, + { + "text": "At the moment I don’t feel I need it anyway.", + "length": 46 + }, + { + "text": "But, then again, Moeen is no ordinary cricketer.", + "length": 48 + }, + { + "text": "‘I’ve done that for much of my career at Worcester.", + "length": 55 + }, + { + "text": "Ali is set to star for England at February's World Cup .", + "length": 56 + }, + { + "text": "But there’s definitely more that people like me can do.", + "length": 57 + }, + { + "text": "It just made me feel that a lot more work needs to be done.", + "length": 59 + }, + { + "text": "’ Not if he carries on as he has done this year he won’t.", + "length": 61 + }, + { + "text": "‘I don’t want to risk not playing for England because of that.", + "length": 66 + }, + { + "text": "They really express themselves and that’s what we need to do too.", + "length": 67 + }, + { + "text": "And maybe it he does then even his home crowd will cheer Moeen Ali.", + "length": 67 + }, + { + "text": "Ali impressed hugely with the ball during a summer of ups and downs for Peter Moores' England .", + "length": 95 + }, + { + "text": "A guy paid £500 for them for the charity of his choice and I had dinner with him in Birmingham.", + "length": 96 + }, + { + "text": "Ali revealed that his dad was 'very upset' at the abuse he received from those in the Midlands .", + "length": 96 + }, + { + "text": "*Investec, the specialist bank and asset manager, is the title sponsor of Test cricket in England.", + "length": 98 + }, + { + "text": "’ So what more can be done to convince more British Asians to support the country of their birth?", + "length": 99 + }, + { + "text": "'I still have my views and opinions but I must be more wary of expressing them on the field,' said Moeen.", + "length": 105 + }, + { + "text": "England all-rounder Moeen Ali expressed his disappointment at being booed in Birmingham over the summer .", + "length": 105 + }, + { + "text": "They just went out and backed themselves and you could see a massive difference in the way they batted compared to us.", + "length": 118 + }, + { + "text": "‘My dad was very upset because it was the first time the family had watched me play for England at Edgbaston,’ said Moeen.", + "length": 126 + }, + { + "text": "I actually think alcohol played a large part but I’ll never forget it and I was just glad to field the last ball when we won.", + "length": 127 + }, + { + "text": "The thing is, when you play in India they cheer everybody and I’ve had Indian friends sort of apologise to me about what happened.", + "length": 132 + }, + { + "text": "Moeen believes that England can exceed expectations by abandoning the traditional cautious mind-set and replacing it with a fearless approach.", + "length": 142 + }, + { + "text": "‘I know a lot of people are writing us off but I feel we have a good chance because of the amount of one-day cricket we are now playing before then.", + "length": 150 + }, + { + "text": "‘I’d love to open,’ said Moeen, who could become an alternative to Alex Hales as Alastair Cook’s partner if the big-hitting Notts man does not come off.", + "length": 160 + }, + { + "text": "‘I’m not going to bowl it because of the scrutiny,’ said Moeen, who confounded suggestions that he was only a ‘part-time’ spinner throughout his fruitful summer.", + "length": 171 + }, + { + "text": "‘I didn’t play in the first three games of our series against India so I watched the way they batted and the biggest thing I got out of that was how fearless they were.", + "length": 172 + }, + { + "text": "‘People have a right to support who they want, of course, but it was a big shame but I’m hoping in the future maybe they or their kids will become England fans or players.", + "length": 175 + }, + { + "text": "’ Moeen, 27, is a candidate to bat at three in Sri Lanka and the World Cup or fill an all-rounder’s role at seven but what he would really like is to go in at the very top.", + "length": 176 + }, + { + "text": "There cannot have been many sportsmen who have risen as spectacularly as Moeen Ali and then been heartily booed by the bulk of his home crowd at the end of a breakthrough summer.", + "length": 178 + }, + { + "text": "‘Sometimes in Asian homes it is about where we’ve come from, which is important too, but it is about where you live and where you’re born and the people who are the same as you.", + "length": 183 + }, + { + "text": "‘I’ve thought about it and I think players like myself and Ravi Bopara need to get out there and tell people that it is about playing for our country and that means playing for England.", + "length": 189 + }, + { + "text": "’ If Moeen was not hurt by the reception then his father Munir, himself born in Birmingham and a tireless worker for disadvantaged youngsters in the area through his academy, certainly was.", + "length": 191 + }, + { + "text": "‘It’s a shame because personally I feel it is a great skill to turn the ball both ways as an off-spinner and I always thought the doosra was a great innovation but I don’t want to be banned.", + "length": 196 + }, + { + "text": "’ Another controversy to affect Moeen last summer was his decision to wear pro-Palestinian wristbands during England's Test against India at Southampton, a move which led to him being ticked off by the ICC.", + "length": 208 + }, + { + "text": "A crackdown on unorthodox actions by the International Cricket Council has seen a number of spinners, notably Moeen’s mentor Saeed Ajmal, banned from bowling and the all-rounder does not want to risk going the same way.", + "length": 221 + }, + { + "text": "After all, he was coming to the end of a season which saw him emerge as not only one of the most exciting talents in the English game but also the perfect role model for British Asians from similar inner-city backgrounds.", + "length": 221 + }, + { + "text": "Now he is part of an England squad that leave for Sri Lanka on Sunday for a seven-match 50-over series that serves as a warm-up for a World Cup in Australia and New Zealand in the New Year that few expect them to thrive in.", + "length": 223 + }, + { + "text": "' He has done quite a lot already both on and off the field, not least in impressing with bat, notably a maiden Test century of rare class against Sri Lanka, and with the ball in taking 19 wickets in the 3-1 success against India.", + "length": 230 + }, + { + "text": "The Birmingham born-and-bred and very British Muslim did not expect the torrent of abuse he received at Edgbaston from the Indian supporting majority of the sell-out crowd when he played for England in their Twenty20 international.", + "length": 231 + }, + { + "text": "‘I won’t say that it hurt but it was disappointing that I live 10 minutes from Edgbaston and I’m getting booed by people I feel I’m supposed to represent,’ said Moeen at Lord’s as he prepared to join England’s one-day tour of Sri Lanka.", + "length": 250 + }, + { + "text": "Yet the sheer scale of the booing, greater than that received by any other English cricketer with an Asian background and predominantly because of his Pakistani heritage, was evidence that he has much to do to convince those like him to follow him and embrace the domestic game.", + "length": 278 + }, + { + "text": "’ When Moeen burst on to the scene post England’s Ashes disaster much was made of his ability as an off-spinner to bowl the doosra, the ball that goes away from the right-hander, but it was rarely seen while he was bamboozling India and now he has reluctantly put it back in his locker, probably for good.", + "length": 309 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5239169150590897 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:08.631240196Z", + "first_section_created": "2025-12-23T09:33:08.631543909Z", + "last_section_published": "2025-12-23T09:33:08.632054731Z", + "all_results_received": "2025-12-23T09:33:08.714729146Z", + "output_generated": "2025-12-23T09:33:08.714975256Z", + "total_processing_time_ms": 83, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 82, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:08.631543909Z", + "publish_time": "2025-12-23T09:33:08.631941226Z", + "first_worker_start": "2025-12-23T09:33:08.63224654Z", + "last_worker_end": "2025-12-23T09:33:08.707739Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:08.632376045Z", + "start_time": "2025-12-23T09:33:08.632464449Z", + "end_time": "2025-12-23T09:33:08.632562953Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:08.632636Z", + "start_time": "2025-12-23T09:33:08.632804Z", + "end_time": "2025-12-23T09:33:08.707739Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:08.63225454Z", + "start_time": "2025-12-23T09:33:08.632326043Z", + "end_time": "2025-12-23T09:33:08.632438448Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:08.632164236Z", + "start_time": "2025-12-23T09:33:08.63224654Z", + "end_time": "2025-12-23T09:33:08.632320643Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:08.631993829Z", + "publish_time": "2025-12-23T09:33:08.632054731Z", + "first_worker_start": "2025-12-23T09:33:08.632453349Z", + "last_worker_end": "2025-12-23T09:33:08.713827Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:08.632455349Z", + "start_time": "2025-12-23T09:33:08.63249255Z", + "end_time": "2025-12-23T09:33:08.632523052Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:08.632723Z", + "start_time": "2025-12-23T09:33:08.632865Z", + "end_time": "2025-12-23T09:33:08.713827Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 80 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:08.632422147Z", + "start_time": "2025-12-23T09:33:08.632453349Z", + "end_time": "2025-12-23T09:33:08.63249255Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:08.632419047Z", + "start_time": "2025-12-23T09:33:08.632454049Z", + "end_time": "2025-12-23T09:33:08.632535052Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 154, + "min_processing_ms": 74, + "max_processing_ms": 80, + "avg_processing_ms": 77, + "median_processing_ms": 80, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3218, + "slowest_section_id": 1, + "slowest_section_time_ms": 81 + } +} diff --git a/data/output/0021eb1696f522ae0605ba630568bed5f655b740.json b/data/output/0021eb1696f522ae0605ba630568bed5f655b740.json new file mode 100644 index 0000000..6a7d2b6 --- /dev/null +++ b/data/output/0021eb1696f522ae0605ba630568bed5f655b740.json @@ -0,0 +1,330 @@ +{ + "file_name": "0021eb1696f522ae0605ba630568bed5f655b740.txt", + "total_words": 513, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "u", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "russian", + "count": 7 + }, + { + "word": "was", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "A U.", + "length": 4 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "Russian and U.", + "length": 14 + }, + { + "text": "\"We acknowledge a U.", + "length": 20 + }, + { + "text": "State Department said.", + "length": 22 + }, + { + "text": "official said Saturday.", + "length": 23 + }, + { + "text": "military plane in April .", + "length": 25 + }, + { + "text": "military official told CNN.", + "length": 27 + }, + { + "text": "plane and Russia over the past few months.", + "length": 42 + }, + { + "text": "Russian fighter jet nearly collided with U.", + "length": 43 + }, + { + "text": "Malaysia Airlines Flight 17 was brought down by a suspected missile.", + "length": 68 + }, + { + "text": "official acknowledged that was done without Swedish military approval.", + "length": 70 + }, + { + "text": "This was at least the second potentially-dangerous encounter between a U.", + "length": 73 + }, + { + "text": "The quickest route away from the Russians took them into Swedish airspace.", + "length": 74 + }, + { + "text": "The incident was first reported by the Swedish news agency Svenska Dagbladet.", + "length": 77 + }, + { + "text": "Russian officials did not provide any immediate reaction about the encounter.", + "length": 77 + }, + { + "text": "On April 23, a Russian Su-27 Flanker fighter jet buzzed within 100 feet of the nose of a U.", + "length": 91 + }, + { + "text": "But the official said the land radar activity by the Russians in this instance was unusual.", + "length": 91 + }, + { + "text": "The Russians then sent at least one fighter jet into the sky to intercept the aircraft, the U.", + "length": 94 + }, + { + "text": "The RC-135 Rivet Joint fled into nearby Swedish airspace without that country's permission, a U.", + "length": 96 + }, + { + "text": "aircraft often encounter each other, both in Northern Europe as well as the area between the Russian Far East and Alaska.", + "length": 121 + }, + { + "text": "The airplane may have gone through other countries' airspace as well, though it's not clear if it had permission to do so.", + "length": 122 + }, + { + "text": "Air Force RC-135U reconnaissance plane over the Sea of Okhotsk between Russia and Japan, a Defense Department official said.", + "length": 124 + }, + { + "text": "The Cold War aerial games of chicken portrayed in the movie \"Top Gun\" are happening in real life again nearly 30 years later.", + "length": 125 + }, + { + "text": "The spy plane crew felt so concerned about the radar tracking that it wanted to get out of the area as quickly as possible, the official said.", + "length": 142 + }, + { + "text": "Pro-Russia rebels have denied allegations from Ukraine and the West that they shot down the Malaysian airliner, or that Russia supplied equipment used to shoot it down.", + "length": 168 + }, + { + "text": "The ongoing civil unrest in Ukraine and the downing of MH 17 over eastern Ukraine on July 17, which killed all 298 people aboard, have heightened tensions between Washington and Moscow.", + "length": 185 + }, + { + "text": "aircraft veered into Swedish airspace and will take active steps to ensure we have properly communicated with Swedish authorities in advance to prevent similar issues before they arise,\" the U.", + "length": 193 + }, + { + "text": "plane had been flying in international airspace, conducting an electronic eavesdropping mission on the Russian military, when the Russians took the unusual action of beginning to track it with land-based radar.", + "length": 210 + }, + { + "text": "As a result of this incident, the United States is discussing the matter with Sweden and letting officials know there may be further occurrences where American jets have to divert so quickly they may not be able to wait for permission.", + "length": 235 + }, + { + "text": "Air Force spy plane evaded an encounter with the Russian military on July 18, just a day after Malaysia Airlines Flight 17 was downed by a suspected surface-to-air missile that Ukraine and the West allege was fired by pro-Russia rebels in eastern Ukraine.", + "length": 255 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6604731678962708 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:09.132798923Z", + "first_section_created": "2025-12-23T09:33:09.133122737Z", + "last_section_published": "2025-12-23T09:33:09.133295844Z", + "all_results_received": "2025-12-23T09:33:09.196722717Z", + "output_generated": "2025-12-23T09:33:09.196887524Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:09.133122737Z", + "publish_time": "2025-12-23T09:33:09.133295844Z", + "first_worker_start": "2025-12-23T09:33:09.13388307Z", + "last_worker_end": "2025-12-23T09:33:09.195741Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:09.13388417Z", + "start_time": "2025-12-23T09:33:09.133946273Z", + "end_time": "2025-12-23T09:33:09.134004175Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:09.134124Z", + "start_time": "2025-12-23T09:33:09.13431Z", + "end_time": "2025-12-23T09:33:09.195741Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:09.133818367Z", + "start_time": "2025-12-23T09:33:09.13388307Z", + "end_time": "2025-12-23T09:33:09.133971374Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:09.13388317Z", + "start_time": "2025-12-23T09:33:09.133972674Z", + "end_time": "2025-12-23T09:33:09.134004675Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2977, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/00224c8551b7b420331c428c6ea737bc61d728fc.json b/data/output/00224c8551b7b420331c428c6ea737bc61d728fc.json new file mode 100644 index 0000000..a0727bb --- /dev/null +++ b/data/output/00224c8551b7b420331c428c6ea737bc61d728fc.json @@ -0,0 +1,338 @@ +{ + "file_name": "00224c8551b7b420331c428c6ea737bc61d728fc.txt", + "total_words": 795, + "top_n_words": [ + { + "word": "the", + "count": 58 + }, + { + "word": "in", + "count": 31 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "per", + "count": 14 + }, + { + "word": "online", + "count": 13 + }, + { + "word": "it", + "count": 12 + }, + { + "word": "was", + "count": 11 + }, + { + "word": "cent", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "04.", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "74 in the US.", + "length": 13 + }, + { + "text": "we watch television.", + "length": 20 + }, + { + "text": "Japan came in second with £7.", + "length": 30 + }, + { + "text": "The study, which was compiled by .", + "length": 34 + }, + { + "text": "across the world, including the U.", + "length": 34 + }, + { + "text": ", much of Europe and the Far East.", + "length": 34 + }, + { + "text": "The internet is also changing the way .", + "length": 39 + }, + { + "text": "Sean Poulter, Consumer Affairs Editor .", + "length": 39 + }, + { + "text": "50 per head, while it was an average of £6.", + "length": 44 + }, + { + "text": "The firm may even start using delivery drones .", + "length": 47 + }, + { + "text": "The British are most likely to access TV content .", + "length": 50 + }, + { + "text": "Total online web spending in this country was £74.", + "length": 51 + }, + { + "text": "Relentless: Amazon staff pick up an order every 33 seconds.", + "length": 59 + }, + { + "text": "over the web, with over a third (36 per cent) doing so every week.", + "length": 66 + }, + { + "text": "Ofcom, compared the internet, mobile and TV services across 17 countries .", + "length": 74 + }, + { + "text": "At the other end of the scale, the figure was just £180 in Spain and £126 in Italy.", + "length": 85 + }, + { + "text": "3billion in 2012 and it appears likely to rise by at least another 10per cent this year.", + "length": 88 + }, + { + "text": "’ The figure of at £1,175 per head in 2012 compares to £867 in Australia and £663 in the USA.", + "length": 98 + }, + { + "text": "Booming: Britons spend more than £1,200 each online every year, the most of any nation surveyed .", + "length": 98 + }, + { + "text": "At one time, it was believed selling clothes online would never work because people like to try things on.", + "length": 106 + }, + { + "text": "Derelict: One in seven high street shops is empty, a figure which threatens to rise with more online sales .", + "length": 108 + }, + { + "text": "There have also been complaints about couriers and postmen dumping items in places where they may be stolen.", + "length": 108 + }, + { + "text": "The British were also most likely to trust web retailers and that goods bought online would be delivered speedily.", + "length": 114 + }, + { + "text": "Amazon suffered a backlash earlier this year when it scrapped free delivery on some items that cost less than £10.", + "length": 115 + }, + { + "text": "One in four do so every week, with internet shopping for groceries more developed in the UK than anywhere else in the world.", + "length": 124 + }, + { + "text": "Some 71 per cent of mobile users access the internet on their handset, which ranks second only to Spain, where it is 75 per cent.", + "length": 129 + }, + { + "text": "Three in four people (73 per cent) with web access in the UK are buying goods for delivery over the internet at least once a month.", + "length": 131 + }, + { + "text": "The figure is far ahead of the rest of the world, including the USA, and some £307 more than in the second ranked country, Australia.", + "length": 134 + }, + { + "text": "The shift has far-reaching implications for the nation’s high streets where tens of thousands of empty shops will need to be bulldozed.", + "length": 137 + }, + { + "text": "Companies trying to reach shoppers via advertising through smartphones are spending more per head in this country than anywhere else – at £8.", + "length": 144 + }, + { + "text": "The British spend far more online than any other nation in the world at an average of £1,175 per head, amid a dramatic shift in shopping habits.", + "length": 145 + }, + { + "text": "Recent studies suggest the nation’s town centres need to be dramatically remodelled to bring in more housing, schools, gyms and community facilities.", + "length": 151 + }, + { + "text": "The research found that 19 per cent of Britons said they did not shop online more often because of concerns about collecting their item if they missed the delivery.", + "length": 164 + }, + { + "text": "It found that YouTube was the most popular website for online video content, with two-thirds of the nation’s laptop and desktop users visiting the site every month.", + "length": 166 + }, + { + "text": "Most people said the reasons for the shift to the web were because it was cheaper and easier, without the hassle, time and parking costs involved in going in to town.", + "length": 166 + }, + { + "text": "However, there are some concerns about delivery charges with four in ten saying they are too high, while three in ten had some concerns that purchases might not arrive.", + "length": 168 + }, + { + "text": "Ofcom said: ‘In recent years, we have found that the UK is the country where online shopping is most popular, with the highest spend per head of all of our comparator countries.", + "length": 179 + }, + { + "text": "Smartphones, such as the iPhone or Samsung Galaxy, with their access to the internet and online video have been adopted more enthusiastically in Britain than virtually any other country.", + "length": 186 + }, + { + "text": "However the sale of clothes and shoes over the web is now among the fastest growing sectors, thanks to the success of firms like Asos and the department stores, which have easy returns policies.", + "length": 194 + }, + { + "text": "In Britain, some 59 per cent of shoppers said it was cheaper to shop online, while 58 per cent said it was easier, 50 per cent argued it was quicker and 47 per cent felt it offered a better choice.", + "length": 197 + }, + { + "text": "Behemoth: It is good news for the ever-growing online shopping firm Amazon, whose giant 'fulfilment centre' warehouse in Peterborough, pictured, has come in for criticism over its employment practices .", + "length": 202 + }, + { + "text": "Social networks, particularly Facebook, are among the most searched-for terms online, while the  photo sharing website ‘Instagram’ was the fastest growing term in the UK, the US, Canada, and Ireland.", + "length": 204 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.46098315715789795 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:09.634080537Z", + "first_section_created": "2025-12-23T09:33:09.634549858Z", + "last_section_published": "2025-12-23T09:33:09.63481757Z", + "all_results_received": "2025-12-23T09:33:09.699830612Z", + "output_generated": "2025-12-23T09:33:09.70001072Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:09.634549858Z", + "publish_time": "2025-12-23T09:33:09.63481757Z", + "first_worker_start": "2025-12-23T09:33:09.63529009Z", + "last_worker_end": "2025-12-23T09:33:09.698923Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:09.635243988Z", + "start_time": "2025-12-23T09:33:09.635312991Z", + "end_time": "2025-12-23T09:33:09.635393595Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:09.635431Z", + "start_time": "2025-12-23T09:33:09.63558Z", + "end_time": "2025-12-23T09:33:09.698923Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:09.635228388Z", + "start_time": "2025-12-23T09:33:09.63529009Z", + "end_time": "2025-12-23T09:33:09.635385694Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:09.635246688Z", + "start_time": "2025-12-23T09:33:09.635313191Z", + "end_time": "2025-12-23T09:33:09.635678307Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4521, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/002277a195d1ebe51a9da6227a6cc44c2e3072c9.json b/data/output/002277a195d1ebe51a9da6227a6cc44c2e3072c9.json new file mode 100644 index 0000000..3309c7d --- /dev/null +++ b/data/output/002277a195d1ebe51a9da6227a6cc44c2e3072c9.json @@ -0,0 +1,294 @@ +{ + "file_name": "002277a195d1ebe51a9da6227a6cc44c2e3072c9.txt", + "total_words": 688, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "found", + "count": 10 + }, + { + "word": "with", + "count": 10 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "it", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Nick Mcdermott, Science Reporter .", + "length": 34 + }, + { + "text": "It would be very powerful if we could.", + "length": 38 + }, + { + "text": "We now want to show this works in humans.", + "length": 41 + }, + { + "text": "The often debilitating condition affects 8.", + "length": 43 + }, + { + "text": "‘Although surgery is very successful, it is not really an answer.", + "length": 67 + }, + { + "text": "Earlier studies have linked sulforaphane to breast cancer prevention.", + "length": 69 + }, + { + "text": "An elderly woman with arthritis: the debilitating condition affects 8.", + "length": 70 + }, + { + "text": "Currently, the NHS performs over 140,000 hip and knee replacement operations each year.", + "length": 87 + }, + { + "text": "5 million adults, with no cure or effective treatment other than pain relief or joint replacement.", + "length": 98 + }, + { + "text": "5 million adults, with no cure or effective treatment other than pain relief or joint replacement .", + "length": 99 + }, + { + "text": "And it is predicted the number of people seeking treatment will almost double in the next 20 years.", + "length": 99 + }, + { + "text": "Prevention would be preferable and changes to lifestyle, like diet, may be the only way to do that.", + "length": 99 + }, + { + "text": "We have shown that this works in the three laboratory models we have tried, in cartilage cells, tissue and mice.", + "length": 112 + }, + { + "text": "Once you have osteoarthritis, being able to slow its progress and the progression to surgery is really important.", + "length": 113 + }, + { + "text": "In a third test, the team observed a protective effect on cow cartilage tissue that was first given sulforaphane.", + "length": 113 + }, + { + "text": "Brussel sprouts with pancetta: the chemical sulforaphane, found in broccoli, is also found in Brussels sprouts and cabbage .", + "length": 124 + }, + { + "text": "Human cartilage cells were also exposed to the chemical and found the genes responsible for cartilage damage were switched off.", + "length": 127 + }, + { + "text": "Sulforaphane - a chemical found mainly in broccoli - reduces joint damage, scientists from the University of East Anglia found .", + "length": 128 + }, + { + "text": "Lead researcher Ian Clark, professor of musculoskeletal biology at UEA, said: ‘The results from this study are very promising.", + "length": 128 + }, + { + "text": "The chemical may also help to reduce the risk of heart attack or stroke by boosting the body’s defence system to keep arteries unclogged.", + "length": 139 + }, + { + "text": "It may not help parents convince their unruly children to eat more greens, but broccoli could hold the key to preventing painful arthritis.", + "length": 139 + }, + { + "text": "It has been found to target the cells that fuel the growth of tumours, preventing the cancer from developing, or spreading when it is established.", + "length": 146 + }, + { + "text": "‘As well as treating those who already have the condition, you need to be able to tell healthy people how to protect their joints into the future.", + "length": 148 + }, + { + "text": "’ Aging and obesity are the most common contributors to the condition, with one in five people over the age of 45 suffering with knee osteoarthritis.", + "length": 151 + }, + { + "text": "In tests involving mice, those fed a diet rich in the compound suffered significantly less wear to their joints, helping protect against osteoarthritis.", + "length": 152 + }, + { + "text": "A compound found in abundance in the vegetable superfood slows down the destruction of cartilage in joints associated with osteoarthritis, according to a new study.", + "length": 164 + }, + { + "text": "There is currently no way in to the disease pharmaceutically and you cannot give healthy people drugs unnecessarily, so this is where diet could be a safe alternative.", + "length": 167 + }, + { + "text": "Scientists from the University of East Anglia found sulforaphane - a chemical found mainly in broccoli but also in Brussels sprouts and cabbage - reduces joint damage.", + "length": 167 + }, + { + "text": "The team will now undertake on a small scale trial of human patients with knee osteoarthritis who are due to have surgery to see if the eating broccoli has a beneficial effect on their joint health.", + "length": 198 + }, + { + "text": "‘Until now research has failed to show that food or diet can play any part in reducing the progression of osteoarthritis, so if these findings can be replicated in humans, it would be quite a breakthrough.", + "length": 207 + }, + { + "text": "The research, published in the journal Arthritis \u0026 Rheumatism, found that more than three-quarters of mice eating the sulforaphane-rich diet showed a reduction in cartilage damage compared to those on a control diet.", + "length": 216 + }, + { + "text": "‘We know that exercise and keeping to a healthy weight can improve people’s symptoms and reduce the chances of the disease progressing, but this adds another layer in our understanding of how diet could play its part.", + "length": 221 + }, + { + "text": "Alan Silman, Arthritis Research UK’s medical director, said: ‘This is an interesting study with promising results as it suggests that a common vegetable, broccoli, might have health benefits for people with osteoarthritis and even possibly protect people from developing the disease in the first place.", + "length": 306 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8035345077514648 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:10.135580562Z", + "first_section_created": "2025-12-23T09:33:10.135904076Z", + "last_section_published": "2025-12-23T09:33:10.136206389Z", + "all_results_received": "2025-12-23T09:33:10.201441741Z", + "output_generated": "2025-12-23T09:33:10.201611148Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:10.135904076Z", + "publish_time": "2025-12-23T09:33:10.136206389Z", + "first_worker_start": "2025-12-23T09:33:10.136656809Z", + "last_worker_end": "2025-12-23T09:33:10.200514Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:10.13667601Z", + "start_time": "2025-12-23T09:33:10.136751813Z", + "end_time": "2025-12-23T09:33:10.136859618Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:10.136938Z", + "start_time": "2025-12-23T09:33:10.137121Z", + "end_time": "2025-12-23T09:33:10.200514Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:10.136672109Z", + "start_time": "2025-12-23T09:33:10.136751713Z", + "end_time": "2025-12-23T09:33:10.136897019Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:10.136575805Z", + "start_time": "2025-12-23T09:33:10.136656809Z", + "end_time": "2025-12-23T09:33:10.136711011Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4247, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0022944fc603b8c03c02f2e67a874b067db28196.json b/data/output/0022944fc603b8c03c02f2e67a874b067db28196.json new file mode 100644 index 0000000..f8bee67 --- /dev/null +++ b/data/output/0022944fc603b8c03c02f2e67a874b067db28196.json @@ -0,0 +1,412 @@ +{ + "file_name": "0022944fc603b8c03c02f2e67a874b067db28196.txt", + "total_words": 1026, + "top_n_words": [ + { + "word": "the", + "count": 80 + }, + { + "word": "of", + "count": 34 + }, + { + "word": "to", + "count": 31 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "s", + "count": 26 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "and", + "count": 23 + }, + { + "word": "syrian", + "count": 18 + }, + { + "word": "meeting", + "count": 15 + }, + { + "word": "that", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Syria.", + "length": 6 + }, + { + "text": "Like the U.", + "length": 11 + }, + { + "text": "Nuland said the U.", + "length": 18 + }, + { + "text": "However, a senior U.", + "length": 20 + }, + { + "text": "\"The bloodshed needs to stop.", + "length": 29 + }, + { + "text": "State Department official said U.", + "length": 33 + }, + { + "text": "Embassy in Damascus last Thursday.", + "length": 34 + }, + { + "text": "Citizens have called a general strike in the city.", + "length": 50 + }, + { + "text": "Embassy Friday and Saturday, the senior State Department official said.", + "length": 71 + }, + { + "text": "Sunday's dialogue meeting began as Syria's foreign ministry summoned the U.", + "length": 75 + }, + { + "text": "Demonstrators protested the meeting in nationwide \"no dialogue\" marches Friday.", + "length": 79 + }, + { + "text": "CNN's Elise Labott, Yousuf Basil and Salma Abdelaziz contributed to this report.", + "length": 80 + }, + { + "text": "Al-Shara acknowledged that a surge of violence in Syria precipitated Sunday's meeting.", + "length": 86 + }, + { + "text": "Several speakers at Sunday's meeting called on Syria's government to change its tactics.", + "length": 88 + }, + { + "text": "The official declined to speak on the record because of the sensitivity of the situation.", + "length": 89 + }, + { + "text": "Yes, there are unauthorized protests, but is it a reason to use unjustified and excessive violence?", + "length": 99 + }, + { + "text": "The Syrian government has claimed armed groups are responsible for the violence at the demonstrations.", + "length": 102 + }, + { + "text": "In the meeting, Ford said his visit to Hama was meant to gather information and support freedom of expression.", + "length": 110 + }, + { + "text": "Activists and Human Rights Watch have reported many arrests and deaths in a fierce government crackdown in the area.", + "length": 116 + }, + { + "text": "The ministry told the diplomats that their visit to the city of Hama violated the Vienna Convention, according to SANA.", + "length": 119 + }, + { + "text": "That 1961 accord, brokered through the United Nations, sets ground rules as to how diplomats can operate in other countries.", + "length": 124 + }, + { + "text": "The use of all types of excessive force is unjustified,\" said Qadri Jameel of the opposition Front of Change and Liberation.", + "length": 124 + }, + { + "text": "Embassy had notified the Syrian Defense Ministry before the visit and that Ford's car was waved through a security checkpoint.", + "length": 126 + }, + { + "text": "He also accused the Syrian government of inciting Syrians against the United States, including organizing a protest outside the U.", + "length": 130 + }, + { + "text": "Syrian researcher Al-Tayyeb Tizzina also criticized the use of force and asked for violence to stop in order for the dialogue to succeed.", + "length": 137 + }, + { + "text": "The state-run Syrian Arab News Agency said the meeting included members of the opposition, independent activists, youth leaders and academics.", + "length": 142 + }, + { + "text": "Diplomatic tensions over Syria also flared in Washington last week, with the State Department summoning Syrian Ambassador Imad Mustapha Friday.", + "length": 143 + }, + { + "text": "Ambassador Robert Ford was not summoned by Syrian officials on Sunday; his meeting with Syria's foreign minister was previously scheduled by the U.", + "length": 147 + }, + { + "text": "\"The establishment of a political society requires the immediate start of a process dismantling the police state that is dominating Syria,\" he said.", + "length": 148 + }, + { + "text": "Syria's vice president hailed the Damascus University meeting between officials and members of the opposition as a step toward creating a \"democratic nation.", + "length": 157 + }, + { + "text": "The State Department said Mustapha was called \"to express a number of our concerns with the reported actions of certain Syrian embassy staff in the United States.", + "length": 162 + }, + { + "text": "Syrian activists say that security personnel have assaulted unarmed protesters during months of anti-government demonstrations that erupted nationwide in mid-March.", + "length": 164 + }, + { + "text": "and French ambassadors and accused them of interfering in internal affairs when they visited Syria's fourth-largest city without permission last week, state media reported.", + "length": 172 + }, + { + "text": "ambassador, Chevallier visited Hama on Thursday and spent the night, the French government said, meeting with wounded people and their families and medical staffers at a hospital.", + "length": 179 + }, + { + "text": "However, some opponents of President Bashar al-Assad's regime have criticized the meeting, saying the government is trying to quiet widespread unrest without making meaningful changes.", + "length": 184 + }, + { + "text": "(CNN) -- Activists speaking at a Syrian government-sponsored \"national dialogue\" meeting Sunday criticized recent crackdowns by the country's security forces, calling for an end to violence against protesters.", + "length": 209 + }, + { + "text": "Al-Assad issued a decree appointing a Hama provincial governor Sunday, a day after firing the existing leader after a series of peaceful demonstrations there, including a massive anti-government protest last Friday.", + "length": 215 + }, + { + "text": "The French government summoned Syria's ambassador to France on Sunday to issue a formal protest on this matter, and to hold Syrian authorities responsible for the security of French diplomats in the Middle Eastern country.", + "length": 222 + }, + { + "text": "\"We have to admit that without the big sacrifices that were presented by the Syrian people, from the blood of their sons, civilians or military in more than one province, city and town, this meeting wouldn't have happened,\" he said.", + "length": 232 + }, + { + "text": "The French foreign ministry issued a statement Sunday saying that its embassy had also been besieged by demonstrators, faulting Syrian authorities for failing to stop the destruction of vehicles, burning of French flags and other damage.", + "length": 237 + }, + { + "text": "\"Any dialogue must be based on the base of (al-Assad's) stepping down from power,\" said a statement from the Change in Syria Conference, an opposition group that called for al-Assad to hand over power to the vice president at a meeting in Turkey last month.", + "length": 257 + }, + { + "text": "State Department spokeswoman Victoria Nuland issued a blunt rebuttal to similar Syrian government accusations Friday, calling claims that Ford's visit was inciting protesters \"absolute rubbish\" and saying she was \"dismayed\" by the Syrian government's reaction.", + "length": 260 + }, + { + "text": "Protesters threw tomatoes, eggs, glass and rocks at the embassy as they called for the ambassador to leave during the 31-hour demonstration, according to the senior State Department official, who asked to remain anonymous because of the sensitivity of the situation.", + "length": 266 + }, + { + "text": "\"We are also investigating reports that the Syrian government has sought retribution against Syrian family members for the actions of their relatives in the United States exercising their lawful rights in this country and will respond accordingly,\" the statement said.", + "length": 268 + }, + { + "text": "\" The statement, issued in response to a question taken at Friday's daily briefing, said the State Department had received reports that Syrian mission personnel had been conducting video surveillance of people participating in peaceful demonstrations in the United States.", + "length": 272 + }, + { + "text": "\" \"We hope that at the end of this comprehensive meeting to announce the transition of Syria to a pluralistic democratic nation where all citizens are guided by equality and participate in the modeling of the future of their country,\" Vice President Faruq al-Shara said in opening remarks at the meeting, which was broadcast live on state television.", + "length": 350 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6574950963258743 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:10.636959081Z", + "first_section_created": "2025-12-23T09:33:10.63830524Z", + "last_section_published": "2025-12-23T09:33:10.638671356Z", + "all_results_received": "2025-12-23T09:33:10.715952334Z", + "output_generated": "2025-12-23T09:33:10.716187344Z", + "total_processing_time_ms": 79, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 77, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:10.63830524Z", + "publish_time": "2025-12-23T09:33:10.638506248Z", + "first_worker_start": "2025-12-23T09:33:10.639034472Z", + "last_worker_end": "2025-12-23T09:33:10.712691Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:10.639038672Z", + "start_time": "2025-12-23T09:33:10.639126576Z", + "end_time": "2025-12-23T09:33:10.63922068Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:10.640091Z", + "start_time": "2025-12-23T09:33:10.640222Z", + "end_time": "2025-12-23T09:33:10.712691Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:10.638972969Z", + "start_time": "2025-12-23T09:33:10.639034472Z", + "end_time": "2025-12-23T09:33:10.639130576Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:10.639015271Z", + "start_time": "2025-12-23T09:33:10.639124675Z", + "end_time": "2025-12-23T09:33:10.639174078Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:10.638559051Z", + "publish_time": "2025-12-23T09:33:10.638671356Z", + "first_worker_start": "2025-12-23T09:33:10.639164377Z", + "last_worker_end": "2025-12-23T09:33:10.712894Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:10.639128876Z", + "start_time": "2025-12-23T09:33:10.639164377Z", + "end_time": "2025-12-23T09:33:10.639194979Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:10.639394Z", + "start_time": "2025-12-23T09:33:10.639528Z", + "end_time": "2025-12-23T09:33:10.712894Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:10.639121975Z", + "start_time": "2025-12-23T09:33:10.639177578Z", + "end_time": "2025-12-23T09:33:10.63921888Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:10.639155877Z", + "start_time": "2025-12-23T09:33:10.639196579Z", + "end_time": "2025-12-23T09:33:10.639209979Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 145, + "min_processing_ms": 72, + "max_processing_ms": 73, + "avg_processing_ms": 72, + "median_processing_ms": 73, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3253, + "slowest_section_id": 0, + "slowest_section_time_ms": 74 + } +} diff --git a/data/output/0022a8a7afe47ac14e7ed51c38f629c866661e38.json b/data/output/0022a8a7afe47ac14e7ed51c38f629c866661e38.json new file mode 100644 index 0000000..6435f2e --- /dev/null +++ b/data/output/0022a8a7afe47ac14e7ed51c38f629c866661e38.json @@ -0,0 +1,412 @@ +{ + "file_name": "0022a8a7afe47ac14e7ed51c38f629c866661e38.txt", + "total_words": 950, + "top_n_words": [ + { + "word": "the", + "count": 51 + }, + { + "word": "a", + "count": 25 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "he", + "count": 19 + }, + { + "word": "her", + "count": 18 + }, + { + "word": "i", + "count": 18 + }, + { + "word": "lowe", + "count": 18 + }, + { + "word": "had", + "count": 15 + }, + { + "word": "in", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "Bye.", + "length": 4 + }, + { + "text": "Bye.", + "length": 4 + }, + { + "text": "John Lowe.", + "length": 10 + }, + { + "text": "He shot my mum.", + "length": 15 + }, + { + "text": "He shot my mum.", + "length": 15 + }, + { + "text": "I'm running for my life.", + "length": 24 + }, + { + "text": "' The call was then ended.", + "length": 26 + }, + { + "text": "”’ The trial continues.", + "length": 27 + }, + { + "text": "44am on February 23 this year.", + "length": 30 + }, + { + "text": "Police at the scene in February.", + "length": 32 + }, + { + "text": "Get Rafferty [a local policeman].", + "length": 33 + }, + { + "text": "Landcruiser for £5,500 and they’ve spent the lot.", + "length": 52 + }, + { + "text": "' She then said: 'I'm gonna go back for him but I'll die.", + "length": 57 + }, + { + "text": "Lucy was shot in the back of her head from around 9ft away.", + "length": 59 + }, + { + "text": "’ He also said: ‘They’ve been giving me s*** for weeks.", + "length": 61 + }, + { + "text": "'I don't know if I'm going to be alive if I go back in there.", + "length": 61 + }, + { + "text": "Sorry we are not currently accepting comments on this article.", + "length": 62 + }, + { + "text": "'I'm nearly outside the house and I fear he's going to shoot me.", + "length": 64 + }, + { + "text": "After his arrest he is alleged to have told police: ‘I shot them both.", + "length": 72 + }, + { + "text": "Lowe, a dog breeder, was said to have shown no remorse for killing them.", + "length": 72 + }, + { + "text": "She had moved into his house to care for him after he fell on hard times.", + "length": 73 + }, + { + "text": "Lowe then had to reload before firing a second fatal shot into her chest.", + "length": 73 + }, + { + "text": "Lowe is accused of shooting his partner and her daughter in a fit of rage .", + "length": 75 + }, + { + "text": "The court was played the harrowing 999 call from Lucy Lee as she 'ran for her life.", + "length": 83 + }, + { + "text": "' Lucy repeatedly told the operator the address of the property, adding: 'He shot my mum.", + "length": 89 + }, + { + "text": "' Breathing heavily and shouting, Lucy told the emergency operator: 'My mother has been shot.", + "length": 93 + }, + { + "text": "Prosecutor Mark Dennis QC told the jury about a ‘desperate [999] call’ made by Lucy at 9.", + "length": 93 + }, + { + "text": "John Lowe, left denies the murder of wife Christine, pictured with step daughter Stacey Banner .", + "length": 96 + }, + { + "text": "Armed police found Mrs Lee and her daughter dead at Lowe's farm after they surrounded the property .", + "length": 100 + }, + { + "text": "The court heard officers found a 'scene of carnage' at the rural site, with four dogs also shot dead .", + "length": 102 + }, + { + "text": "But Mr Dennis said that the defendant had complained about the mother and daughter interfering in his affairs.", + "length": 110 + }, + { + "text": "Lowe, who had a valid shotgun licence at the time, had used the same weapon to kill the pair and the four dogs.", + "length": 111 + }, + { + "text": "” ‘Her last words to the operator were, “I don’t know if I’m going to be alive if I go back in there.", + "length": 111 + }, + { + "text": "Christine Lee had been living with Lowe and providing him with care and assistance after he fell on hard times .", + "length": 112 + }, + { + "text": "Lowe’s long-term partner Susanna Wilson had died in 2013 and he had lost his licence to breed dogs at the farm.", + "length": 113 + }, + { + "text": "Mr Dennis said that Christine had known Lowe for many years and had occasionally been in a relationship with him.", + "length": 113 + }, + { + "text": "A post-mortem examination confirmed Christine was killed by a single gunshot fired into her chest from about 1ft away.", + "length": 118 + }, + { + "text": "Mr Dennis said evidence suggested that he shot Christine as she was in a ‘cowering position’ as he stood over her.", + "length": 118 + }, + { + "text": "’ The pensioner denies two charges of murder and a third charge of possessing a firearm with intent to endanger life.", + "length": 119 + }, + { + "text": "Killed: Lucy Lee, 40, made a desperate 999 call saying she was ‘running for her life’ moments before she was shot by .", + "length": 122 + }, + { + "text": "’ The jury heard that when police arrived they found Lucy lifeless on the garden steps and Christine dead in the living room.", + "length": 127 + }, + { + "text": "‘The defendant, in effect, ended her life in the same heartless way as he used to dispose of the lives of the four dogs,’ he said.", + "length": 134 + }, + { + "text": "Armed police later surrounded Lowe’s farm but found the mother and daughter both dead amid ‘a scene of carnage’, the court heard.", + "length": 135 + }, + { + "text": "Lucy Lee, 40, told the emergency operator ‘I fear he’s going to shoot me’ after John Lowe, 82, allegedly shot her mother Christine.", + "length": 137 + }, + { + "text": "” Those were indeed her last words to anyone save possibly to John Lowe, who moments later was to kill her with two shots from a shotgun.", + "length": 139 + }, + { + "text": "The court heard that the former gamekeeper had a love-hate relationship with the pair, who had been looking after him in the months before their deaths.", + "length": 152 + }, + { + "text": "A woman made a desperate 999 call saying she was ‘running for her life’ moments before she was shot dead by her mother’s ex-boyfriend, a court heard yesterday.", + "length": 165 + }, + { + "text": "Guildford Crown Court heard that he shot his 66-year-old former partner and her daughter at close range before killing four dogs at his farm in Tilford near Farnham, Surrey.", + "length": 173 + }, + { + "text": "‘The female caller was in a frightened and frantic state, saying that a man called John Lowe had just shot her mother and that she was herself running for her life,’ he said.", + "length": 178 + }, + { + "text": "A few weeks before the killings, Lowe told a mechanic who visited the property to repair a vehicle: ‘Those two between them drive me ******* mad, they want to know about the ins and out of everything.", + "length": 202 + }, + { + "text": "John Lowe told police after his arrest that the two women had been 'giving me s*** for weeks' Lowe, pictured arriving at court, told police the shotgun went off by accident after he argued with Mrs Lee .", + "length": 203 + }, + { + "text": "’ PC Chris Gleeson, who arrested Lowe, said: ‘He said she [Christine] was an ex-girlfriend and added, “She’s been supposed to be looking after me but she’s been starving me to death and she’s been keeping me short of money.", + "length": 235 + }, + { + "text": "‘Moments later, showing extraordinary courage, the caller indicated that she had made the decision to go back to the scene of the incident, saying as she ran, “I’m gonna go back for him but I’ll die,” followed by, “I’m nearly outside the house and I fear he’s going to shoot me.", + "length": 294 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5464192032814026 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:11.139313243Z", + "first_section_created": "2025-12-23T09:33:11.141202425Z", + "last_section_published": "2025-12-23T09:33:11.141677846Z", + "all_results_received": "2025-12-23T09:33:11.219137532Z", + "output_generated": "2025-12-23T09:33:11.219296039Z", + "total_processing_time_ms": 79, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 77, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:11.141202425Z", + "publish_time": "2025-12-23T09:33:11.141510439Z", + "first_worker_start": "2025-12-23T09:33:11.141850453Z", + "last_worker_end": "2025-12-23T09:33:11.218272Z", + "total_journey_time_ms": 77, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:11.141915856Z", + "start_time": "2025-12-23T09:33:11.142029461Z", + "end_time": "2025-12-23T09:33:11.142121865Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:11.142233Z", + "start_time": "2025-12-23T09:33:11.142388Z", + "end_time": "2025-12-23T09:33:11.218272Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:11.141757449Z", + "start_time": "2025-12-23T09:33:11.141850453Z", + "end_time": "2025-12-23T09:33:11.141989059Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:11.141820452Z", + "start_time": "2025-12-23T09:33:11.141917856Z", + "end_time": "2025-12-23T09:33:11.14199336Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:11.141560641Z", + "publish_time": "2025-12-23T09:33:11.141677846Z", + "first_worker_start": "2025-12-23T09:33:11.142092364Z", + "last_worker_end": "2025-12-23T09:33:11.182486Z", + "total_journey_time_ms": 40, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:11.142107465Z", + "start_time": "2025-12-23T09:33:11.142130266Z", + "end_time": "2025-12-23T09:33:11.142138166Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:11.142318Z", + "start_time": "2025-12-23T09:33:11.142435Z", + "end_time": "2025-12-23T09:33:11.182486Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 40 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:11.142080463Z", + "start_time": "2025-12-23T09:33:11.142153067Z", + "end_time": "2025-12-23T09:33:11.142161467Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:11.142067963Z", + "start_time": "2025-12-23T09:33:11.142092364Z", + "end_time": "2025-12-23T09:33:11.142096064Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 115, + "min_processing_ms": 40, + "max_processing_ms": 75, + "avg_processing_ms": 57, + "median_processing_ms": 75, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2570, + "slowest_section_id": 0, + "slowest_section_time_ms": 77 + } +} diff --git a/data/output/0022b77434a498d0bf38836caa3ec8c071f624f6.json b/data/output/0022b77434a498d0bf38836caa3ec8c071f624f6.json new file mode 100644 index 0000000..b94f329 --- /dev/null +++ b/data/output/0022b77434a498d0bf38836caa3ec8c071f624f6.json @@ -0,0 +1,620 @@ +{ + "file_name": "0022b77434a498d0bf38836caa3ec8c071f624f6.txt", + "total_words": 1308, + "top_n_words": [ + { + "word": "the", + "count": 92 + }, + { + "word": "to", + "count": 46 + }, + { + "word": "of", + "count": 34 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "were", + "count": 19 + }, + { + "word": "said", + "count": 14 + }, + { + "word": "they", + "count": 14 + }, + { + "word": "three", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "At .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Ms .", + "length": 4 + }, + { + "text": "'We .", + "length": 5 + }, + { + "text": "'So .", + "length": 5 + }, + { + "text": "'If .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "When .", + "length": 6 + }, + { + "text": "They .", + "length": 6 + }, + { + "text": "' The .", + "length": 7 + }, + { + "text": "Custer .", + "length": 8 + }, + { + "text": "Tuesday.", + "length": 8 + }, + { + "text": "the cars.", + "length": 9 + }, + { + "text": "20, 1970.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "skeletons.", + "length": 10 + }, + { + "text": "no nothing.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Authorities .", + "length": 13 + }, + { + "text": "never gave up.", + "length": 14 + }, + { + "text": "skull, she said.", + "length": 16 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "There were no leads, .", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "closure to those families.", + "length": 26 + }, + { + "text": "The two cars are seen above.", + "length": 28 + }, + { + "text": "She told Oklahoma's Newsnine.", + "length": 29 + }, + { + "text": "09:43 EST, 18 September 2013 .", + "length": 30 + }, + { + "text": "'We lived in a little town ...", + "length": 30 + }, + { + "text": "20:16 EST, 17 September 2013 .", + "length": 30 + }, + { + "text": "Jimmy Allen Williams 16, Thomas .", + "length": 33 + }, + { + "text": "In addition to the Custer County .", + "length": 34 + }, + { + "text": "detoured to go hunting at Foss Lake.", + "length": 36 + }, + { + "text": "The three teenagers never returned home.", + "length": 40 + }, + { + "text": "wondering where a missing loved one was.", + "length": 40 + }, + { + "text": "'It's just been under water for 40 years.", + "length": 41 + }, + { + "text": "Nothing like that ever happened in Sayre.", + "length": 41 + }, + { + "text": "He died in 2003 never knowing what happened.", + "length": 44 + }, + { + "text": "Carmichael added: 'He said there was nothing ...", + "length": 48 + }, + { + "text": "com: 'I just remember how devastated everybody was .", + "length": 52 + }, + { + "text": "' Oklahoma Highway Patrol said they are hoping the .", + "length": 52 + }, + { + "text": "that came out they found bones in the car,' she said.", + "length": 53 + }, + { + "text": "He said it was just like they vanished into thin air.", + "length": 53 + }, + { + "text": "to the scene, many of whom said that they never lost hope.", + "length": 58 + }, + { + "text": "When they pulled the cars out of the water, the first one .", + "length": 59 + }, + { + "text": "who have been waiting to hear about missing people,' she said.", + "length": 62 + }, + { + "text": "training with sonar when they came upon the vehicles last week.", + "length": 63 + }, + { + "text": "the time of the disappearance, Ms Carmichael's father was the .", + "length": 63 + }, + { + "text": "headed to a football game in nearby Elk City but also could have .", + "length": 66 + }, + { + "text": "the Camaro are those of the three teens who went missing in 1970 .", + "length": 66 + }, + { + "text": "County Sheriff Bruce Peoples said he believes the bodies found in .", + "length": 67 + }, + { + "text": "expected to use DNA from surviving family members to identify the .", + "length": 67 + }, + { + "text": "remains were turned over to the medical examiner's office who are .", + "length": 67 + }, + { + "text": "are still listed as missing persons and were thought to have been .", + "length": 67 + }, + { + "text": "Drudging up clues: There were five skeletons between the two cars .", + "length": 67 + }, + { + "text": "they pulled the second car out, another set of bones was discovered.", + "length": 68 + }, + { + "text": "discovered the accidentally as Betsy Randolph, spokeswoman for the .", + "length": 68 + }, + { + "text": "Michael Rios, 18, and Leah Gail Johnson, 18, all went missing after .", + "length": 69 + }, + { + "text": "undersheriff in nearby Beckham County where the teens were last seen.", + "length": 69 + }, + { + "text": "going for a drive in Jimmy’s blue 1969 Camaro on November 20, 1970.", + "length": 69 + }, + { + "text": "Spokesman Betsy Randolph added: 'We're hoping these individuals, that .", + "length": 71 + }, + { + "text": "They had lain undiscovered under an Oklahoma lake for over forty years.", + "length": 71 + }, + { + "text": "medical examiner had called a number of relatives of possible victims .", + "length": 71 + }, + { + "text": "Oklahoma Highway Patrol, said dive teams were at Foss Lake conducting .", + "length": 71 + }, + { + "text": "this is going to bring some sort of closure to some families out there .", + "length": 72 + }, + { + "text": "discovery will offer some relief to families who may have gone decades .", + "length": 72 + }, + { + "text": "Sheriff's Department, the Oklahoma Highway Patrol, the Oklahoma Bureau .", + "length": 72 + }, + { + "text": "The divers then went back in the water and searched around and found a .", + "length": 72 + }, + { + "text": "It's a mucky mess,' Custer County Sheriff Bruce Peoples told KWEY radio.", + "length": 72 + }, + { + "text": "of Investigation, and the state medical examiner's office were on scene .", + "length": 73 + }, + { + "text": "they went back and did a scheduled dive today and were going to recover .", + "length": 73 + }, + { + "text": "that's the case, then we're thrilled we were able to bring some sort of .", + "length": 73 + }, + { + "text": "Scene: The cars weer found in Foss Lake near Elk City in western Oklahoma .", + "length": 75 + }, + { + "text": "Missing: Jimmy Williams poses next to his then brand-new Camaro as a 16-year-old in 1970 .", + "length": 90 + }, + { + "text": "' Local woman Kim Carmichael was a friend of the Camaro's owner, 16-year-old Jimmy Williams.", + "length": 92 + }, + { + "text": "According to one of his friends called Wayne, the three friends were actually going on a shooting trip.", + "length": 103 + }, + { + "text": "'I can't imagine what [Williams'] family was going through if I could see what my dad was going through.", + "length": 104 + }, + { + "text": "He said: 'The decomposed nature of the cars makes it difficult to positively identify here at the scene.", + "length": 104 + }, + { + "text": "Three bodies believed to be those of local teenagers who disappeared in 1970 were discovered inside the Camaro .", + "length": 112 + }, + { + "text": "They say they have confirmed the identity of at least one of the victims however details are yet to be released.", + "length": 112 + }, + { + "text": "We always wanted some clue that somebody knew someone,' said Debbie McManaman, a possible victim's granddaughter.", + "length": 113 + }, + { + "text": "Jimmy Allen Williams, 16, and his friends Thomas Michael Rios, 18, and Leah Gail Johnson, 18, disappeared on On Nov.", + "length": 116 + }, + { + "text": "Jimmy, who had a part time job at a grocery store, had bought himself a brand new Camaro muscle car six days earlier.", + "length": 117 + }, + { + "text": "'We thought it was just going to be stolen vehicles and that's not what it turned out to be, obviously,' Randolph said.", + "length": 119 + }, + { + "text": "Inside the Camaro were three bodies believed to be of local teenagers who vanished after going out for a drive in 1970.", + "length": 119 + }, + { + "text": "Police say the Camaro matches the vehicle associated with the three missing teenagers who disappeared on November 10 1970.", + "length": 122 + }, + { + "text": "Evidence: An investigator takes a picture of a shoe at the crime scene where six bodies and two cars were pulled from the lake .", + "length": 128 + }, + { + "text": "Chance find: The two cars were discovered by Highway Patrolmen testing new sonar equipment at Foss Lake near Elk City, Oklahoma .", + "length": 129 + }, + { + "text": "Rusted: Police divers were sent down to investigate the cars before they were pulled out of the lake and the bones were discovered .", + "length": 132 + }, + { + "text": "Wayne said he was had planned to join them but changed his mind at the last minute because there was not enough room for him in the car.", + "length": 136 + }, + { + "text": "Other victims: Local news reports claim that the three people inside the Chevrolet were a 69-year-old man from Elk City and his two friends .", + "length": 141 + }, + { + "text": "Investigation: Custer County Sheriff Bruce Peoples said the decomposed nature of the cars made it difficult to positively identify them at the scene .", + "length": 150 + }, + { + "text": "The other car - a 1950 model Chevrolet - contained three bodies believed to be those of Washita County residents who went missing in the early 1960s .", + "length": 150 + }, + { + "text": "A 1950s Chevrolet similar to the older car which was believed to contain the remains of three people from Washita County, who went missing in the early 1960s .", + "length": 159 + }, + { + "text": "The bodies discovered in the Camaro are believed to be those of (L-R) Jimmy Williams, 16,  Leah Gail Johnson and Thomas Michael Rios who disappeared in 1970 .", + "length": 159 + }, + { + "text": "Authorities have not formally identified all of the remains belong to but the local paper has made a clear connection between the discovered Camaro and the teens.", + "length": 162 + }, + { + "text": "Shock discovery: It happened in Custer County, Oklahoma, when officers went to Foss Lake to take out a new sonar detection system, just to see how well it worked .", + "length": 163 + }, + { + "text": "Inside the Chevrolet were three more bodies – thought to be a 69-year-old man and his two friends who went missing in the state in the late 1950s or early 1960s.", + "length": 163 + }, + { + "text": "The remains found inside are to be turned over to the medical examiner's office who are expected to use DNA from surviving family members to identify the skeletons .", + "length": 165 + }, + { + "text": "Highway patrol officers testing their sonar equipment Foss Lake near Elk City, Oklahoma on Friday stumbled upon the rusting 1969 Camaro and a Chevrolet dating back to the 1950s.", + "length": 177 + }, + { + "text": "And now the discovery of two rusting vintage cars containing six bodies has reignited the mystery of how three teenagers and three other people vanished more than four decades ago.", + "length": 180 + }, + { + "text": "Baffled: The discovery of two rusting vintage cars containing six bodies has reignited the mystery of how three teenagers and three other people vanished more than four decades ago .", + "length": 182 + }, + { + "text": "He told his parents he was going to a football game, but according to reports at the time, he backed up his car to the back door of the home and loaded several shotguns into the trunk with the help of one of his younger brothers.", + "length": 229 + }, + { + "text": "The latest reports by local station KFOR states that one of the victims in the car thought to have belonged to the teenager has been identified but they are waiting to notify all of the relatives of the victims involved before releasing any names.", + "length": 247 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4861864596605301 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:11.64247734Z", + "first_section_created": "2025-12-23T09:33:11.64478354Z", + "last_section_published": "2025-12-23T09:33:11.645254561Z", + "all_results_received": "2025-12-23T09:33:11.731945251Z", + "output_generated": "2025-12-23T09:33:11.732171961Z", + "total_processing_time_ms": 89, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:11.64478354Z", + "publish_time": "2025-12-23T09:33:11.645115355Z", + "first_worker_start": "2025-12-23T09:33:11.645608577Z", + "last_worker_end": "2025-12-23T09:33:11.731044Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:11.645701081Z", + "start_time": "2025-12-23T09:33:11.645768284Z", + "end_time": "2025-12-23T09:33:11.645849687Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:11.64585Z", + "start_time": "2025-12-23T09:33:11.646065Z", + "end_time": "2025-12-23T09:33:11.731044Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:11.645623977Z", + "start_time": "2025-12-23T09:33:11.64568018Z", + "end_time": "2025-12-23T09:33:11.645769684Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:11.645516172Z", + "start_time": "2025-12-23T09:33:11.645608577Z", + "end_time": "2025-12-23T09:33:11.645721881Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:11.645159657Z", + "publish_time": "2025-12-23T09:33:11.645254561Z", + "first_worker_start": "2025-12-23T09:33:11.645662479Z", + "last_worker_end": "2025-12-23T09:33:11.719131Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:11.645733682Z", + "start_time": "2025-12-23T09:33:11.645784084Z", + "end_time": "2025-12-23T09:33:11.645859988Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:11.645941Z", + "start_time": "2025-12-23T09:33:11.646065Z", + "end_time": "2025-12-23T09:33:11.719131Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:11.645699681Z", + "start_time": "2025-12-23T09:33:11.645737882Z", + "end_time": "2025-12-23T09:33:11.645799585Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:11.645616977Z", + "start_time": "2025-12-23T09:33:11.645662479Z", + "end_time": "2025-12-23T09:33:11.645707481Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 157, + "min_processing_ms": 73, + "max_processing_ms": 84, + "avg_processing_ms": 78, + "median_processing_ms": 84, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3753, + "slowest_section_id": 0, + "slowest_section_time_ms": 86 + } +} diff --git a/data/output/0022bf4c213508038fa0f031484797186f18580b.json b/data/output/0022bf4c213508038fa0f031484797186f18580b.json new file mode 100644 index 0000000..24741d7 --- /dev/null +++ b/data/output/0022bf4c213508038fa0f031484797186f18580b.json @@ -0,0 +1,282 @@ +{ + "file_name": "0022bf4c213508038fa0f031484797186f18580b.txt", + "total_words": 731, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "of", + "count": 26 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "nurses", + "count": 18 + }, + { + "word": "from", + "count": 15 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "is", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "eu", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "But most do not.", + "length": 16 + }, + { + "text": "This is a major step backwards for patient safety.", + "length": 50 + }, + { + "text": "It’s as if the Mid Staffs scandal passed the NMC by.", + "length": 54 + }, + { + "text": "They are just doing whatever is expedient to fill posts.", + "length": 56 + }, + { + "text": "We ought not to have this reliance on workers coming here from overseas.", + "length": 72 + }, + { + "text": "He added: ‘It seems we just aren’t training enough of our own nurses.", + "length": 73 + }, + { + "text": "’ On the relaxing of rules for nurses from non-EU nations, he said: ‘This is ludicrous.", + "length": 91 + }, + { + "text": "Roger Goss, from Patient Concern, said the increase in EU nurses was ‘really alarming’.", + "length": 91 + }, + { + "text": "It would ensure that they were ‘able to practise safely and effectively in the UK’, she added.", + "length": 98 + }, + { + "text": "There are now 20,914 registered to practise in hospitals and care homes, compared with 10,244 in 2010.", + "length": 102 + }, + { + "text": "Over the past year many hospitals have been desperately recruiting nurses from abroad to fill understaffed wards.", + "length": 113 + }, + { + "text": "‘However good a nurse is, if they have trained abroad they may well not be familiar with the systems here and the drugs we use.", + "length": 129 + }, + { + "text": "’ Jackie Smith, chief executive of the NMC, insisted the new test for non-EU nurses was ‘internationally recognised’ and ‘rigorous’.", + "length": 142 + }, + { + "text": "The watchdog insisted the test would be rigorous, but patient groups warned that it was a major step backwards that would put patients at risk.", + "length": 143 + }, + { + "text": "’ One senior NHS official, who works in the North of England, said there were concerns that the lack of checks would ‘put patients at risk’.", + "length": 146 + }, + { + "text": "The poll also revealed that 54 per cent of Health Service bosses planned to recruit nurses from overseas, mainly Spain, Portugal, Greece and Italy.", + "length": 147 + }, + { + "text": "A survey in June of more than 100 senior NHS managers found that 52 per cent did not normally use aptitude tests when recruiting nurses from the Continent.", + "length": 155 + }, + { + "text": "The NMC is replacing the course with a single test – half of which includes an online, multiple-choice quiz – to enable hospitals to recruit more quickly.", + "length": 158 + }, + { + "text": "‘Again and again we have seen concerns about the communication skills of these workers, and communication is absolutely critical when it comes to healthcare.", + "length": 159 + }, + { + "text": "Latest figures from the NMC show that in 2013-14, some 5,388 nurses from the EU joined its register, up from 3,436 the previous year – and the 2,715 who joined in 2011.", + "length": 170 + }, + { + "text": "As it is unable to carry out its own tests on EU nurses, the NMC has urged hospitals, care homes and GP surgeries to carry out checks on individuals before employing them.", + "length": 171 + }, + { + "text": "The number of nurses from the European Union allowed to work in Britain has doubled in just four years, raising fears of patients being put at risk due to language issues.", + "length": 171 + }, + { + "text": "NHS Trusts have been desperately recruiting nurses from overseas in the wake of the Mid Staffordshire scandal, when hospitals were criticised for not having enough nurses .", + "length": 172 + }, + { + "text": "Separately, the Nursing and Midwifery Council, the regulator of standards, last night announced it was relaxing the rules on nurses from outside the EU coming to work in Britain.", + "length": 178 + }, + { + "text": "This was partly fuelled by a report into the Mid Staffordshire hospital scandal, in which hundreds of patients died due to poor care, that warned many hospitals had too few nurses.", + "length": 180 + }, + { + "text": "Patients' groups are warning that an influx of nurses from Europe is threatening patient safety because EU rules do not allow hospitals to test their grasp of English before hiring them .", + "length": 187 + }, + { + "text": "Leading doctors have warned that the poor communication skills of some EU nurses is putting patients in danger, and say their skills are not up to the same standard of British-trained colleagues.", + "length": 195 + }, + { + "text": "At present non-EU nurses must prove their English is of a high enough standard before going on a course of up to 12 months at university which includes supervised work on a ward or in a care home.", + "length": 196 + }, + { + "text": "Speaking on condition of anonymity, he told the Mirror newspaper: ‘Without a period of supervision, there is absolutely no way you can tell whether they are actually capable of delivering safe care.", + "length": 200 + }, + { + "text": "’ Janet Davies, an executive director at the Royal College of Nursing, said: ‘Too often, nurses are recruited from overseas to fill short-term gaps and given inadequate support to care for patients well.", + "length": 207 + }, + { + "text": "But EU laws mean the nursing watchdog is banned from testing their command of the English language or their medical competence before giving them a licence to work because it is deemed to threaten ‘freedom of movement’.", + "length": 223 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.9077115654945374 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:12.146032254Z", + "first_section_created": "2025-12-23T09:33:12.146336667Z", + "last_section_published": "2025-12-23T09:33:12.146606079Z", + "all_results_received": "2025-12-23T09:33:12.207299232Z", + "output_generated": "2025-12-23T09:33:12.207817955Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:12.146336667Z", + "publish_time": "2025-12-23T09:33:12.146606079Z", + "first_worker_start": "2025-12-23T09:33:12.147329211Z", + "last_worker_end": "2025-12-23T09:33:12.206367Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:12.147277608Z", + "start_time": "2025-12-23T09:33:12.147366512Z", + "end_time": "2025-12-23T09:33:12.147440715Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:12.147576Z", + "start_time": "2025-12-23T09:33:12.147733Z", + "end_time": "2025-12-23T09:33:12.206367Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:12.147253407Z", + "start_time": "2025-12-23T09:33:12.147329211Z", + "end_time": "2025-12-23T09:33:12.147457616Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:12.147337111Z", + "start_time": "2025-12-23T09:33:12.147418114Z", + "end_time": "2025-12-23T09:33:12.147477617Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4271, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/002329f9f6f8b8aea77e774f0fe4143302617bd0.json b/data/output/002329f9f6f8b8aea77e774f0fe4143302617bd0.json new file mode 100644 index 0000000..7f318a9 --- /dev/null +++ b/data/output/002329f9f6f8b8aea77e774f0fe4143302617bd0.json @@ -0,0 +1,238 @@ +{ + "file_name": "002329f9f6f8b8aea77e774f0fe4143302617bd0.txt", + "total_words": 445, + "top_n_words": [ + { + "word": "to", + "count": 23 + }, + { + "word": "the", + "count": 18 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "brown", + "count": 9 + }, + { + "word": "women", + "count": 8 + }, + { + "word": "for", + "count": 7 + }, + { + "word": "have", + "count": 7 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "they", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "8 Minutes will air on A\u0026E.", + "length": 26 + }, + { + "text": "We read that and thought somebody should put a camera on this.", + "length": 62 + }, + { + "text": "It launched a drive within his church to run these undercover operations.", + "length": 73 + }, + { + "text": "Changed man: Brown, who has spent years trying to save and help women, used to be a vice cop .", + "length": 94 + }, + { + "text": "' Way out: Even the women who refuse his help get a number to call should they change their mind .", + "length": 98 + }, + { + "text": "The show will follow these interactions, as well as life after for women that have chosen to leave.", + "length": 99 + }, + { + "text": "And though the show is not set to air until next year, Brown has been doing this work for years now.", + "length": 100 + }, + { + "text": "If they do not accept his help, Brown does still leave the women with a phone number to contact him later.", + "length": 106 + }, + { + "text": "'Women have burst into tears and said, “I’ve been waiting for someone to offer me a path out,”' said Forman.", + "length": 114 + }, + { + "text": "'And you have women who say, “I’m doing what I’m doing and I’m not doing anything else,” and they decline.", + "length": 116 + }, + { + "text": "A former vice cop who now works as a full-time pastor will star in a new reality show that aims at getting women off the street.", + "length": 128 + }, + { + "text": "'Brown told his congregation that for 20 years he’s had to arrest these women when what he’s really wanted to do is help them.", + "length": 130 + }, + { + "text": "' Brown works to target woman he feels have been forced into the business, something he is good at after all his years working in vice.", + "length": 135 + }, + { + "text": "'This is one of those great shows that was actually happening whether anybody was shooting it or not,' executive producer Tom Forman told EW.", + "length": 141 + }, + { + "text": "He then meets them in a room pretending to be a client, and offers them the chance to quit, providing the assistance they would need to leave the lifestyle.", + "length": 156 + }, + { + "text": "8 Minutes will center around Pastor Kevin Brown and his work trying to get prostitutes and escorts in the Los Angeles area to give up their work and go straight.", + "length": 161 + }, + { + "text": "Saved: Pastor Kevin Brown will star in the new reality show 8 Minutes in which he will have eight minutes to try and convince a prostitute to give up the lifestyle and go straight .", + "length": 181 + }, + { + "text": "In addition to Brown, the show will also have safety lookouts to make sure nothing turns violent and former prostitutes who the Pastor previously saved from the lifestyle to talk to the girls.", + "length": 192 + }, + { + "text": "As for the eight minutes, that is a safety protocol for the girls, this way if they decline they can go back to their pimps, should they have one, and just claim Brown was a cop without causing any problems.", + "length": 207 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.43578484654426575 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:12.647399772Z", + "first_section_created": "2025-12-23T09:33:12.647674784Z", + "last_section_published": "2025-12-23T09:33:12.647868293Z", + "all_results_received": "2025-12-23T09:33:12.708755055Z", + "output_generated": "2025-12-23T09:33:12.708945763Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:12.647674784Z", + "publish_time": "2025-12-23T09:33:12.647868293Z", + "first_worker_start": "2025-12-23T09:33:12.648371715Z", + "last_worker_end": "2025-12-23T09:33:12.707863Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:12.648380415Z", + "start_time": "2025-12-23T09:33:12.648456019Z", + "end_time": "2025-12-23T09:33:12.64849792Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:12.648604Z", + "start_time": "2025-12-23T09:33:12.648753Z", + "end_time": "2025-12-23T09:33:12.707863Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:12.648313412Z", + "start_time": "2025-12-23T09:33:12.648371715Z", + "end_time": "2025-12-23T09:33:12.648438918Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:12.648316112Z", + "start_time": "2025-12-23T09:33:12.648391016Z", + "end_time": "2025-12-23T09:33:12.648411917Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2359, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0023360917e0780d0802a8c31b35001877ca2ae2.json b/data/output/0023360917e0780d0802a8c31b35001877ca2ae2.json new file mode 100644 index 0000000..93b7743 --- /dev/null +++ b/data/output/0023360917e0780d0802a8c31b35001877ca2ae2.json @@ -0,0 +1,282 @@ +{ + "file_name": "0023360917e0780d0802a8c31b35001877ca2ae2.txt", + "total_words": 603, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "miss", + "count": 9 + }, + { + "word": "course", + "count": 8 + }, + { + "word": "s", + "count": 7 + }, + { + "word": "as", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Sophie Jane Evans .", + "length": 19 + }, + { + "text": "14:43 EST, 4 November 2013 .", + "length": 28 + }, + { + "text": "18:46 EST, 4 November 2013 .", + "length": 28 + }, + { + "text": "' She added: 'I will definitely be recommending Tough Guy to Chris and his team-mates.", + "length": 86 + }, + { + "text": "They were also joined by Chantelle Tagoe, the fiance of ex-England striker Emile Heskey.", + "length": 88 + }, + { + "text": "'We had heard that Tough Guy is the hardest obstacle challenge out there both physically and mentally.", + "length": 102 + }, + { + "text": "All cleaned up: Sam Cooke, who founded Limitless Skies, takes a stroll with her Premier League boyfriend Chris Smalling .", + "length": 121 + }, + { + "text": "Perfect pairings: Kaya Hall is pictured with United and England star Jones, left, and Dani Emery with young keeper Amos, right .", + "length": 128 + }, + { + "text": "Getting muddy: Miss Williams and Miss Emery get muddy in a river during the challenge, which was set over 200 acres of farmland .", + "length": 129 + }, + { + "text": "' Every year, around 5,000 people take part in a race around the assault course - however, a third never even make the finishing line.", + "length": 134 + }, + { + "text": "But these WAGs have manned up and taken part in a gruelling assault course set over 200 acres of farmland in Perton, South Staffordshire.", + "length": 137 + }, + { + "text": "They are usually spotted off the pitch, cheering on their other halves in designer clothes, flawless make-up and carrying enviable handbags.", + "length": 140 + }, + { + "text": "Speaking after the ordeal, Miss Cooke said: 'As a group we wanted to really test ourselves and raise money for Limitless Skies in the process.", + "length": 142 + }, + { + "text": "Raising money: The WAGs were raising funds for Limitless Skies - a charity which supports orphans and vulnerable children in Tanzania, Africa .", + "length": 143 + }, + { + "text": "Striking a pose: Jayel Williams swings from a rope during the high-endurance course, which also includes 'killing fields' and a hillside slalom .", + "length": 145 + }, + { + "text": "As well as Miss Cooke, England star Phil Jones's girlfriend Kaya Hall and young keeper Ben Amos's girlfriend Dani Emery took part in the challenge.", + "length": 147 + }, + { + "text": "The charity, founded by Sam Cooke - girlfriend of Manchester United defender Chris Smalling - supports orphans and vulnerable children in Tanzania, Africa.", + "length": 155 + }, + { + "text": "Testing: The course, designed to replicate trench warfare, features more than 160 obstacles - including running through nettles and clambering up cargo nets .", + "length": 158 + }, + { + "text": "Competitiors are required to haul themselves over huge wooden barricades, plunge into a freezing lake and crawl under an electrified fence dubbed 'The Tiger'.", + "length": 158 + }, + { + "text": "This year's event on January 24 was made even tougher by a rapid melt of snow which took place overnight - something that, luckily, the WAGs didn't have to deal with.", + "length": 166 + }, + { + "text": "'Training and getting fit for the event was great fun - we just wanted to raise as much money as possible for the children and that was what got us around the course.", + "length": 166 + }, + { + "text": "Challenging: Glamour model Sam Cooke - girlfriend of Manchester United's Chris Smalling - receives a kiss from a fellow participant as she tackles the 200-acre course .", + "length": 168 + }, + { + "text": "Warming up: Kaya Hall - girlfriend of Manchester United's Phil Jones - and Chantelle Tagoe - fiance of ex-England striker Emile Heskey - also took part in the challenge .", + "length": 170 + }, + { + "text": "Tough: Dani Emery - girlfriend of Manchester United footballer Ben Amos - battles through mud under barbed wire during the assault course in Perton, South Staffordshire .", + "length": 170 + }, + { + "text": "Twelve WAGs took part in the course - billed the world's 'safest, most dangerous three to four hours of mental and physical endurance' - to raise money for Limitless Skies.", + "length": 172 + }, + { + "text": "The 'Tough Guy' challenge, designed to replicate trench warfare, features more than 160 obstacles - including running through nettles, battling fire pits and clambering up 20-metre high cargo nets.", + "length": 197 + }, + { + "text": "Achievement: Twelve WAGs took part in the challenge - billed the world's 'safest, most dangerous three to four hours of mental and physical endurance - including Amy Orange, Miss Williams, Miss Emery, Miss Cooke, Miss Tagoe, Louise Quartey-Papafio, Lisa Horner and Miss Hall .", + "length": 276 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5970908999443054 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:13.148642586Z", + "first_section_created": "2025-12-23T09:33:13.149010802Z", + "last_section_published": "2025-12-23T09:33:13.14921091Z", + "all_results_received": "2025-12-23T09:33:13.21942018Z", + "output_generated": "2025-12-23T09:33:13.219590387Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:13.149010802Z", + "publish_time": "2025-12-23T09:33:13.14921091Z", + "first_worker_start": "2025-12-23T09:33:13.149860339Z", + "last_worker_end": "2025-12-23T09:33:13.218545Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:13.149846138Z", + "start_time": "2025-12-23T09:33:13.149919641Z", + "end_time": "2025-12-23T09:33:13.149990344Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:13.150043Z", + "start_time": "2025-12-23T09:33:13.15018Z", + "end_time": "2025-12-23T09:33:13.218545Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:13.149809337Z", + "start_time": "2025-12-23T09:33:13.14989664Z", + "end_time": "2025-12-23T09:33:13.150006545Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:13.149798536Z", + "start_time": "2025-12-23T09:33:13.149860339Z", + "end_time": "2025-12-23T09:33:13.149948543Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3730, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/002360f910e8b59949ff31b49af0a7b8126593d3.json b/data/output/002360f910e8b59949ff31b49af0a7b8126593d3.json new file mode 100644 index 0000000..05a9ebf --- /dev/null +++ b/data/output/002360f910e8b59949ff31b49af0a7b8126593d3.json @@ -0,0 +1,440 @@ +{ + "file_name": "002360f910e8b59949ff31b49af0a7b8126593d3.txt", + "total_words": 958, + "top_n_words": [ + { + "word": "the", + "count": 35 + }, + { + "word": "a", + "count": 34 + }, + { + "word": "and", + "count": 30 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "was", + "count": 21 + }, + { + "word": "she", + "count": 20 + }, + { + "word": "her", + "count": 19 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "esme", + "count": 16 + }, + { + "word": "of", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "OK.", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Her .", + "length": 5 + }, + { + "text": "Cross.", + "length": 6 + }, + { + "text": "Esme .", + "length": 6 + }, + { + "text": "‘We .", + "length": 7 + }, + { + "text": "daughter.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Hugo Gye .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "September 12, 3.", + "length": 16 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "07:18 EST, 24 September 2013 .", + "length": 30 + }, + { + "text": "05:15 EST, 23 September 2013 .", + "length": 30 + }, + { + "text": "Her 19-year-old sister Sian last week .", + "length": 39 + }, + { + "text": "touch so that we know that you are safe.", + "length": 40 + }, + { + "text": "4pm: She boards a train at Aldershot railway station.", + "length": 53 + }, + { + "text": "You are not in any trouble so please just get in touch.", + "length": 55 + }, + { + "text": "Stavanger, said he was 'worried sick' about his teenage .", + "length": 57 + }, + { + "text": "'It's wonderful news that she has turned up,' the man said.", + "length": 59 + }, + { + "text": "She tells friends she is going to Woking and then to a party.", + "length": 61 + }, + { + "text": "father Mark Smith, who works at Nato’s Joint Warfare Centre in .", + "length": 66 + }, + { + "text": "last seen by friends at the end of the school day on September 12.", + "length": 66 + }, + { + "text": "is a pupil at All Hallows Catholic School in Farnham, where she was .", + "length": 69 + }, + { + "text": "reading this, I just want to say please come home or at least get in .", + "length": 70 + }, + { + "text": "15pm: Esme Smith leaves All Hallows Catholic School in Farnham, Surrey.", + "length": 71 + }, + { + "text": "' Mr Savell said that establishing what happened to Esme was a priority.", + "length": 72 + }, + { + "text": "are all really worried about you and we just want to make sure you are .", + "length": 72 + }, + { + "text": "made an emotional appeal to the missing girl, saying: ‘Esme, if you are .", + "length": 75 + }, + { + "text": "September 20: The teenager's father Mark says he is 'worried sick' about Esme.", + "length": 78 + }, + { + "text": "' Discovery: Esme was found following extensive media coverage of her disappearance .", + "length": 85 + }, + { + "text": "Her worried family issued an appeal for information, saying they feared she might be in danger.", + "length": 95 + }, + { + "text": "'I have to admit we were all worried, so this is a massive relief to finally have some good news.", + "length": 97 + }, + { + "text": "The police spokesman said: 'A call was made to Surrey Police by the Metropolitan Police around 9.", + "length": 97 + }, + { + "text": "Found: Esme Smith, 14, has been discovered in London after going missing from her home in Surrey .", + "length": 98 + }, + { + "text": "CCTV: Surveillance footage showed the 14-year-old getting off a train at Waterloo on September 12 .", + "length": 99 + }, + { + "text": "September 23: The 14-year-old is found safe and well in North London and taken to a local police station.", + "length": 105 + }, + { + "text": "On camera: The teenager had changed out of her school uniform by the time she arrived in London, pictured .", + "length": 107 + }, + { + "text": "40am today to say that the 14-year-old, who went missing on September 12, was at a north London police station.", + "length": 111 + }, + { + "text": "Detective Superintendent Jon Savell thanked the public and the media for their assistance in helping to find Esme.", + "length": 114 + }, + { + "text": "This has been a very distressing time for Esme and her family and we would ask that that their privacy is respected.", + "length": 116 + }, + { + "text": "She was also wearing a blue denim jacket and was carrying a black school bag and black floral bag with chain straps.", + "length": 116 + }, + { + "text": "CCTV images of the teenager at Waterloo show she had changed out of her school uniform and into a white flowery dress.", + "length": 118 + }, + { + "text": "September 19: Her family launches a public appeal for information, as her sister Sian urges her to get in touch with them.", + "length": 122 + }, + { + "text": "Instead she went to Aldershot railway station and caught a train to London's Waterloo station, where she was caught on CCTV.", + "length": 124 + }, + { + "text": "'Her safe return is thought to be as a direct result of the media appeals, and the media and public are thanked for their assistance.", + "length": 133 + }, + { + "text": "’ More than 4,000 people had 'liked' a Facebook page called Find Esme Smith, dedicated to raising awareness of the girl's disappearance.", + "length": 138 + }, + { + "text": "Missing teenager Esme Smith has been found safe and well after a member of the public recognised her from media reports about her disappearance.", + "length": 144 + }, + { + "text": "The daughter of a Nato commander was yesterday found in North London and has now been returned to Surrey, according to a Surrey Police spokesman.", + "length": 145 + }, + { + "text": "' There was no answer at the Smiths' family home today, but a neighbour revealed that locals were delighted by the news that Esme had been found.", + "length": 145 + }, + { + "text": "Esme was last seen 11 days ago when she left school with friends, telling them that she was travelling to nearby Woking and would then go on to a party.", + "length": 152 + }, + { + "text": "Officials in Surrey were due today to interview her about her whereabouts for the past 11 days and determine whether or not she has been the victim of a crime.", + "length": 159 + }, + { + "text": "The 14-year-old from Farnham, Surrey was spotted in North London by a passer-by who flagged down a nearby patrol car and alerted officers, according to Surrey Police.", + "length": 166 + }, + { + "text": "It is unclear whether or not she was with anyone else when she was seen by a passer-by, although she was alone when she was escorted to a North London police station yesterday.", + "length": 176 + }, + { + "text": "' A tall, black man driving a BMW, who was believed to be a social worker, turned up at the family home yesterday afternoon and left half an hour later carrying a small suitcase.", + "length": 178 + }, + { + "text": "Mr Smith has been decorated for service in Afghanistan, the Gulf War and Northern Ireland and he has been commander of medical planning at the Norwegian base since March last year.", + "length": 180 + }, + { + "text": "'We have seen huge use of social media, such as Twitter and Facebook, throughout our search and this has been really beneficial but we would now ask that these pages are taken down.", + "length": 181 + }, + { + "text": "He said: 'It is unclear what has happened and where she has been, and establishing that is obviously a priority for us, as is ensuring her health and welfare, and continuing to support her family.", + "length": 196 + }, + { + "text": "The head of public protection for Surrey Police said: 'Throughout our search for Esme, we were very fortunate to have the support of the local community, Esme's school, her friends, the public and the media, and it was as a direct result of the media activity that she was found.", + "length": 279 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5407648980617523 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:13.649995403Z", + "first_section_created": "2025-12-23T09:33:13.650329018Z", + "last_section_published": "2025-12-23T09:33:13.650779138Z", + "all_results_received": "2025-12-23T09:33:13.732869627Z", + "output_generated": "2025-12-23T09:33:13.733077036Z", + "total_processing_time_ms": 83, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 82, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:13.650329018Z", + "publish_time": "2025-12-23T09:33:13.65060123Z", + "first_worker_start": "2025-12-23T09:33:13.651085451Z", + "last_worker_end": "2025-12-23T09:33:13.731946Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:13.651083251Z", + "start_time": "2025-12-23T09:33:13.651169455Z", + "end_time": "2025-12-23T09:33:13.651272159Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:13.651228Z", + "start_time": "2025-12-23T09:33:13.651372Z", + "end_time": "2025-12-23T09:33:13.731946Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 80 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:13.651023448Z", + "start_time": "2025-12-23T09:33:13.651085451Z", + "end_time": "2025-12-23T09:33:13.651198956Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:13.651023048Z", + "start_time": "2025-12-23T09:33:13.651088051Z", + "end_time": "2025-12-23T09:33:13.651157154Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:13.650669733Z", + "publish_time": "2025-12-23T09:33:13.650779138Z", + "first_worker_start": "2025-12-23T09:33:13.651166055Z", + "last_worker_end": "2025-12-23T09:33:13.711322Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:13.651154454Z", + "start_time": "2025-12-23T09:33:13.651186556Z", + "end_time": "2025-12-23T09:33:13.651194556Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:13.651658Z", + "start_time": "2025-12-23T09:33:13.651789Z", + "end_time": "2025-12-23T09:33:13.711322Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:13.651134153Z", + "start_time": "2025-12-23T09:33:13.651166055Z", + "end_time": "2025-12-23T09:33:13.651186756Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:13.651224157Z", + "start_time": "2025-12-23T09:33:13.651260459Z", + "end_time": "2025-12-23T09:33:13.651264859Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 139, + "min_processing_ms": 59, + "max_processing_ms": 80, + "avg_processing_ms": 69, + "median_processing_ms": 80, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2654, + "slowest_section_id": 0, + "slowest_section_time_ms": 81 + } +} diff --git a/data/output/00236fafde6a7cc9e13dc406cc520fd74b7b5366.json b/data/output/00236fafde6a7cc9e13dc406cc520fd74b7b5366.json new file mode 100644 index 0000000..578f384 --- /dev/null +++ b/data/output/00236fafde6a7cc9e13dc406cc520fd74b7b5366.json @@ -0,0 +1,428 @@ +{ + "file_name": "00236fafde6a7cc9e13dc406cc520fd74b7b5366.txt", + "total_words": 1264, + "top_n_words": [ + { + "word": "the", + "count": 115 + }, + { + "word": "of", + "count": 43 + }, + { + "word": "and", + "count": 36 + }, + { + "word": "to", + "count": 35 + }, + { + "word": "in", + "count": 28 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "from", + "count": 17 + }, + { + "word": "is", + "count": 16 + }, + { + "word": "on", + "count": 16 + }, + { + "word": "will", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "UK.", + "length": 3 + }, + { + "text": "diverted .", + "length": 10 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Remaining very mild for mid-October.", + "length": 36 + }, + { + "text": "Sunrise over Helmdon, Northamptonshire.", + "length": 39 + }, + { + "text": "Low lying fog near Halse, Northamptonshire.", + "length": 43 + }, + { + "text": "The changeable weather is set to continue into next week .", + "length": 58 + }, + { + "text": "On Saturday, rain will give way to sunshine and heavy showers.", + "length": 62 + }, + { + "text": "Turning increasingly wet and windy in the west during the evening.", + "length": 66 + }, + { + "text": "Sheep emerge from the mist this morning near Brackley, Northampton.", + "length": 67 + }, + { + "text": "But hurricanes emanating from the region are not always bad news for Britain.", + "length": 77 + }, + { + "text": "Photographer Aimee Bracken captured to images during an early morning walk today .", + "length": 82 + }, + { + "text": "The southern half of the UK will have a mild day with sunny intervals and scattered showers.", + "length": 92 + }, + { + "text": "The storm will bring heavy downpours to Britain, but temperatures are forecast to rise to above 20C .", + "length": 101 + }, + { + "text": "'The detail remains difficult for this weekend with a cold front meandering and waving about over the UK.", + "length": 105 + }, + { + "text": "Images from the US National Oceanic and Atmospheric Administration (NOAA) show the size of the huge storm .", + "length": 107 + }, + { + "text": "Weather maps from the US have led even American forecasters to warn that Britain is in for a series of storms .", + "length": 111 + }, + { + "text": "'At the moment it could bring a wet and windy spell to the UK next Tuesday or dissipate and track further south.", + "length": 112 + }, + { + "text": "The massive storm is just the latest weather system to move in from the US and play havoc with Britain's climate.", + "length": 113 + }, + { + "text": "So, we can turn off the heating, but not quite T-shirt weather so keep the brollies and waterproof coats to handy too.", + "length": 118 + }, + { + "text": "'On Friday, many areas will be dry and fairly warm with some sunshine, although a few showers are possible in the west.", + "length": 119 + }, + { + "text": "He said: 'A change in the weather pattern from today to a more 'westerly' flow from the Atlantic and a strong jet stream.", + "length": 121 + }, + { + "text": "Images from NOAA show the storm moving across the Atlantic Ocean on Sunday (left) into Monday (right) earlier this week .", + "length": 121 + }, + { + "text": "Gale force winds and high tides coincided yesterday to create huge waves, which battered Seaham Harbour in County Durham .", + "length": 123 + }, + { + "text": "The extra tropical remnants will be caught in the jet stream on Monday and move rapidly eastwards towards NW Europe next Tuesday.", + "length": 129 + }, + { + "text": "Warnings of further rain follow flooding in the village of Alconbury, Cambridgeshire yesterday after downpours earlier this week .", + "length": 130 + }, + { + "text": "There will then be a brief let-up on Friday morning, before more rain sweeps in from the West overnight and into Saturday morning.", + "length": 130 + }, + { + "text": "' Met Office maps show the swirl of weather fronts caused by the very low pressure system currently sitting in the Atlantic Ocean .", + "length": 131 + }, + { + "text": "A photo of the sunrise over the North Sea shows the calm this morning before heavy rain is set to roll across the UK later this week .", + "length": 134 + }, + { + "text": "Satellite images show the storm stretching from the east coast of the US right the way across the Atlantic as it swirls towards the UK .", + "length": 136 + }, + { + "text": "The newspaper reported that the conditions are 'likely to bring stormy conditions to the northern British Isles Friday into the weekend'.", + "length": 137 + }, + { + "text": "The first bands of rain caused by the huge storm have already led to flooding in some parts, including this in Perry Barr, West Midlands .", + "length": 138 + }, + { + "text": "While these sheep may be in for a further lashing of rain later this week, Britain will enjoy temperatures of up to 20C over the weekend .", + "length": 138 + }, + { + "text": "A huge weather system covering the entire width of the Atlantic is set to hit Britain with repeated bands of rain over the next five days .", + "length": 139 + }, + { + "text": "Around 65mm of rain is expected to fall on parts of Cornwall over the next five days - with more than 60mm predicted in Cumbria and South Wales.", + "length": 144 + }, + { + "text": "Britain is set for a soaking over the next five days caused by a massive weather system which stretches across the entire width of the Atlantic.", + "length": 144 + }, + { + "text": "'The showers are expected to be heaviest and most frequent in south Wales and south-west England during the afternoon when they may turn thundery.", + "length": 146 + }, + { + "text": "After heavy rain across the whole country today, showers will hit the South West and Wales tomorrow, with thunder storms predicted in some areas.", + "length": 146 + }, + { + "text": "'The uncertainty for next week is the time and track of the remains of Hurricane Gonzalo, which is a risk for Bermuda as it speeds north on Friday.", + "length": 147 + }, + { + "text": "Leon Brown, a forecaster from the Weather Channel, says the conditions in the Atlantic make it difficult to know how long the wet weather will last.", + "length": 148 + }, + { + "text": "' Commenting on the huge storm, he added: 'The large depression to the west of the UK is beginning to pump up warmth, but also moisture towards the UK.", + "length": 151 + }, + { + "text": "After a huge band of rain sweeps across the country today, intense showers and storms will move in tomorrow followed by heavy downpours over the weekend.", + "length": 153 + }, + { + "text": "'Tomorrow, lingering rain across northern parts of the UK will tend to die out through the day with the best of the sunshine reserved for north-west Scotland.", + "length": 158 + }, + { + "text": "The Environment Agency currently has flood warnings in place in Essex and parts of East Anglia and is warning parts of the South East and Midlands to be on alert.", + "length": 162 + }, + { + "text": "The Washington Post reported on Tuesday that the system's 'central pressure dropped 46 mb in 24 hours (from 1002 mb to 956 mb) – a textbook meteorological bomb'.", + "length": 164 + }, + { + "text": "That hurricane formed in the warm waters of the Caribbean first lashed the Dominican Republic, the Bahamas and the Turks and Caicos Islands with winds of up to 90mph.", + "length": 166 + }, + { + "text": "Hurricane Cristobal, which ravaged parts of the Caribbean, brought warm weather to the UK later in August this year by dragging an area of high pressure up from Europe.", + "length": 168 + }, + { + "text": "In August this year, five flood warnings and 33 flood alerts were brought in when the remnants of Hurricane Bertha moved up from the Caribbean and swept across Britain.", + "length": 168 + }, + { + "text": "' The huge storm is just one of three huge depressions filing the Atlantic Ocean at the moment, with Hurricane Fay and Hurricane Gonzalo currently barrelling towards Bermuda.", + "length": 174 + }, + { + "text": "Despite the wet weather on the way, parts of Britain enjoyed a beautiful sunrise this morning, as this picture of mist over a lake in Astwell Spinney, Northamptonshire, show .", + "length": 175 + }, + { + "text": "The massive low pressure system which is bearing down on the UK is causing havoc on both sides of the Atlantic, with hurricanes barrelling into the Caribbean and southern US .", + "length": 175 + }, + { + "text": "This will bring more wet weather, but also some very mild temperatures for a time as southerly winds strengthen and draw warmth northwards from Iberia to western Europe, and the S.", + "length": 180 + }, + { + "text": "But while heavy rain will hit most areas over the next few days, the storm will also send temperatures soaring to around 20C at the weekend - well above average for this time of year.", + "length": 183 + }, + { + "text": "The wet - but unseasonably warm - weather is being caused by a massive low pressure which is sitting to the west of Britain, causing hurricanes in the US and unsettled conditions in the UK.", + "length": 189 + }, + { + "text": "Bertha then travelled north, veering away from the US and Canadian coast before heading across the Atlantic and being downgraded to an 'Atlantic storm', which dropped 20mm of rain an hour on some parts of the UK.", + "length": 212 + }, + { + "text": "A forecaster for Meteogroup said: 'Today, a band of rain stretching from Northern Ireland, through northern and eastern parts of England will continue to drift northwards overnight, reaching all but northern Scotland by dawn.", + "length": 225 + }, + { + "text": "Met Office forecast maps predict showers will dump up to 16mm of rain an hour on parts of the South West late this evening, before another band of downpours of similar intensity moves into Wales and along the south coast tomorrow.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6294819861650467 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:14.151676936Z", + "first_section_created": "2025-12-23T09:33:14.152065653Z", + "last_section_published": "2025-12-23T09:33:14.152395967Z", + "all_results_received": "2025-12-23T09:33:14.239351069Z", + "output_generated": "2025-12-23T09:33:14.239598279Z", + "total_processing_time_ms": 87, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:14.152065653Z", + "publish_time": "2025-12-23T09:33:14.152282862Z", + "first_worker_start": "2025-12-23T09:33:14.152707081Z", + "last_worker_end": "2025-12-23T09:33:14.223949Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:14.152765283Z", + "start_time": "2025-12-23T09:33:14.152852187Z", + "end_time": "2025-12-23T09:33:14.152961292Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:14.153043Z", + "start_time": "2025-12-23T09:33:14.153198Z", + "end_time": "2025-12-23T09:33:14.223949Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:14.152730582Z", + "start_time": "2025-12-23T09:33:14.152789884Z", + "end_time": "2025-12-23T09:33:14.152881488Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:14.152632578Z", + "start_time": "2025-12-23T09:33:14.152707081Z", + "end_time": "2025-12-23T09:33:14.152743182Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:14.152316164Z", + "publish_time": "2025-12-23T09:33:14.152395967Z", + "first_worker_start": "2025-12-23T09:33:14.152842887Z", + "last_worker_end": "2025-12-23T09:33:14.238467Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:14.152805685Z", + "start_time": "2025-12-23T09:33:14.152852087Z", + "end_time": "2025-12-23T09:33:14.15290659Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:14.153123Z", + "start_time": "2025-12-23T09:33:14.153263Z", + "end_time": "2025-12-23T09:33:14.238467Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 85 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:14.152795285Z", + "start_time": "2025-12-23T09:33:14.152878188Z", + "end_time": "2025-12-23T09:33:14.152931591Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:14.152804485Z", + "start_time": "2025-12-23T09:33:14.152842887Z", + "end_time": "2025-12-23T09:33:14.152882088Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 155, + "min_processing_ms": 70, + "max_processing_ms": 85, + "avg_processing_ms": 77, + "median_processing_ms": 85, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3659, + "slowest_section_id": 1, + "slowest_section_time_ms": 86 + } +} diff --git a/data/output/00237a1d4477c976ee17d82d438e2650e0bff9a9.json b/data/output/00237a1d4477c976ee17d82d438e2650e0bff9a9.json new file mode 100644 index 0000000..7ea931a --- /dev/null +++ b/data/output/00237a1d4477c976ee17d82d438e2650e0bff9a9.json @@ -0,0 +1,388 @@ +{ + "file_name": "00237a1d4477c976ee17d82d438e2650e0bff9a9.txt", + "total_words": 1108, + "top_n_words": [ + { + "word": "the", + "count": 64 + }, + { + "word": "to", + "count": 35 + }, + { + "word": "a", + "count": 33 + }, + { + "word": "of", + "count": 32 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "s", + "count": 21 + }, + { + "word": "de", + "count": 15 + }, + { + "word": "middel", + "count": 15 + }, + { + "word": "that", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "\"We're going to Mars!", + "length": 21 + }, + { + "text": "\"Otherwise, I would have ruined the game.", + "length": 41 + }, + { + "text": "Read this: Artist's spectacular glasses .", + "length": 41 + }, + { + "text": "Read this: Zambia's amazing street acrobats .", + "length": 45 + }, + { + "text": "Read this: Nigerian doctor takes to the skies .", + "length": 47 + }, + { + "text": "This led her to go about the story with caution.", + "length": 48 + }, + { + "text": "\"That is something common to all humanity,\" she adds.", + "length": 53 + }, + { + "text": "\"But instead of producing a moving image, I just did stills.", + "length": 60 + }, + { + "text": "Others were repurposed pictures from the photographer's archive.", + "length": 64 + }, + { + "text": "\"I would love to [take the exhibition to Lusaka as well],\" she says.", + "length": 68 + }, + { + "text": "\"He had a fascination for the universe that we all share,\" says De Middel.", + "length": 74 + }, + { + "text": "\"Asking if we're alone, looking at the stars, making metaphysical questions.", + "length": 76 + }, + { + "text": "\"It was like a short, small and very modest movie production,\" says De Middel.", + "length": 78 + }, + { + "text": "So far, she says, her work has received a great response from people in Africa.", + "length": 79 + }, + { + "text": "\" Today, nobody seems to know what happened to Nkoloso or his cast of wannabe space explorers.", + "length": 94 + }, + { + "text": "approaching a subject that can be sensitive or can be offensive for some people,\" says De Middel.", + "length": 97 + }, + { + "text": "De Middel, who's never been to Zambia, acknowledges she's not \"an expert in Africa\" -- nor in space.", + "length": 100 + }, + { + "text": "\"I think that's the greatest characteristic we have as humans, that we can dream of becoming big,\" says De Middel.", + "length": 114 + }, + { + "text": "Whilst scouring the depths of the internet, she stumbled on a website listing the 10 craziest experiments in history.", + "length": 117 + }, + { + "text": "In the self-published book, De Middel self-consciously conjures up the story of the unofficial space program piece by piece.", + "length": 124 + }, + { + "text": "From within what he called the \"Academy of Sciences and Space Technology,\" Nkoloso said, he'd been studying Mars through telescopes.", + "length": 132 + }, + { + "text": "That is a universal feeling and it doesn't belong to the people who can actually have the technology to go to the moon; it's everywhere.", + "length": 136 + }, + { + "text": "\" Most of the images were shot in between different projects, in locations such as Spain, the Palestinian territories, Italy and Romania.", + "length": 137 + }, + { + "text": "The result is \"The Afronauts,\" an arresting photo book that has been shortlisted for this year's esteemed Deutsche Börse Photography Prize.", + "length": 140 + }, + { + "text": "\" \"Big dreams\" Whilst playful, De Middel's dream-like images are not intended to make fun of Nkoloso's fantastical, yet high-flying, ambitions.", + "length": 143 + }, + { + "text": "Throughout, facts and fiction are intertwined as part of an intriguing narrative which challenges viewers' perceptions about what's real and what's not.", + "length": 152 + }, + { + "text": "\"I was working in a very free way,\" says De Middel, sitting at the café of the Photographer's Gallery in London, where The Afronauts is being exhibited.", + "length": 153 + }, + { + "text": "She's been contacted by Nigeria's space program and been invited to the continent to give talks, while her book is being shown in South Africa and Senegal.", + "length": 155 + }, + { + "text": "If anything, De Middel says, the extraordinary tale of the forgotten Zambian space program presented a chance to talk about Africa from a different perspective.", + "length": 160 + }, + { + "text": "Fascinated by Nkoloso's visionary and dreamy perspective on life, De Middel set about creating an imaginary documentation of his elusive endeavors some 50 years ago.", + "length": 165 + }, + { + "text": "\"Not only because the story is positive, in terms of African people having dreams, but also evidencing what we expect from Africa in terms of aesthetics and behavior.", + "length": 166 + }, + { + "text": "Yet Nkoloso's desire to dream the impossible has found a new, alternative, home inside De Middel's images, striking a chord with captivated audiences around the world.", + "length": 167 + }, + { + "text": "\"You don't have to be American and work for NASA to dream of going to the moon; you can be an African -- he [Nkoloso] was a school teacher and thought that could be done.", + "length": 170 + }, + { + "text": "\"The Afronauts\" Fast forward to 2010, when Spanish photographer Cristina De Middel was searching for \"unbelievable stories\" for a new personal project she was hoping to develop.", + "length": 177 + }, + { + "text": "\" \"Honest approach\" Creating The Afronauts, which was sold out in just a few months, De Middel worked more as a movie director, trying to make the best of the resources around her.", + "length": 180 + }, + { + "text": "Her speculative pictures exude a feeling of nostalgia and sympathy, celebrating the audacious and naive spirit of a past era where grandiose dreams were not limited by circumstances.", + "length": 182 + }, + { + "text": "(CNN) -- Half a century ago, with the space race in full swing, the heated quest for interplanetary exploration between the Earth's superpowers gained a new, self-proclaimed, contender.", + "length": 185 + }, + { + "text": "\"The first one on the list was the Zambia space program,\" says De Middel who, after a decade of working as a news photojournalist, had decided to embark on a new career as a visual storyteller.", + "length": 193 + }, + { + "text": "He'd also been training his would-be astronauts by rolling them down a hill in oil drums, a technique aimed at getting his team acclimatized to the weightlessness experienced during space travel.", + "length": 195 + }, + { + "text": "For models, she relied on social media and friends; for the astronauts' helmets, she used old domes of street lights; and for the flashy spacesuits, she employed the sewing talents of her grandmother.", + "length": 200 + }, + { + "text": "\"Specially trained spacegirl Matha Mwambwa, two cats (also specially trained) and a missionary will be launched in our first rocket,\" wrote Nkoloso, a grade-school science teacher and self-appointed director of the space academy.", + "length": 229 + }, + { + "text": "She uses a series of cinematographic images, including staged depictions of discarded oil barrels, makeshift spaceships, elephant-hugging spacemen and flying cats, as well as vintage-looking maps, documents and newspapers cuttings.", + "length": 231 + }, + { + "text": "\"I needed to add mystery; I needed to add this fascination for great things and work on the photographic language that would not state if it's true,\" adds De Middel, encouraging viewers to question the documentary value of photography.", + "length": 235 + }, + { + "text": "\"The only honest approach I could do to that story was documenting my cliché, and that's what I really wanted to do, because, in a way, I was raising awareness of the existence of that cliché and what we expect from Africa,\" she says.", + "length": 236 + }, + { + "text": "\"Our rocket crew is ready,\" continued Nkoloso, explaining that his aspiring troupe of space explorers had been gearing up for their interstellar journey in the headquarters of the academy he'd set up on the outskirts of Zambian capital Lusaka.", + "length": 243 + }, + { + "text": "\" audaciously declared Zambian schoolteacher Edward Makuka Nkoloso in a 1964 newspaper op-ed, revealing to the world his fanciful plans for his country to beat the United States and the Soviet Union in their fierce battle to conquer outer space.", + "length": 245 + }, + { + "text": "Unsurprisingly, the program, which was never taken seriously by the government of the newly independent Zambia, failed to take off; a $7 million grant Nkoloso said he'd requested from UNESCO never came, whilst the pregnancy of the 17-year-old spacegirl brought the proceedings to an end.", + "length": 287 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.39870473742485046 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:14.65315053Z", + "first_section_created": "2025-12-23T09:33:14.654974405Z", + "last_section_published": "2025-12-23T09:33:14.655386122Z", + "all_results_received": "2025-12-23T09:33:14.738006726Z", + "output_generated": "2025-12-23T09:33:14.738228235Z", + "total_processing_time_ms": 85, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 82, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:14.654974405Z", + "publish_time": "2025-12-23T09:33:14.655234815Z", + "first_worker_start": "2025-12-23T09:33:14.655689034Z", + "last_worker_end": "2025-12-23T09:33:14.73716Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:14.655777038Z", + "start_time": "2025-12-23T09:33:14.655855041Z", + "end_time": "2025-12-23T09:33:14.655949645Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:14.656089Z", + "start_time": "2025-12-23T09:33:14.656252Z", + "end_time": "2025-12-23T09:33:14.73716Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 80 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:14.655755037Z", + "start_time": "2025-12-23T09:33:14.65582654Z", + "end_time": "2025-12-23T09:33:14.655923544Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:14.655608231Z", + "start_time": "2025-12-23T09:33:14.655689034Z", + "end_time": "2025-12-23T09:33:14.655724736Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:14.655304218Z", + "publish_time": "2025-12-23T09:33:14.655386122Z", + "first_worker_start": "2025-12-23T09:33:14.655853641Z", + "last_worker_end": "2025-12-23T09:33:14.729945Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:14.655810639Z", + "start_time": "2025-12-23T09:33:14.655853641Z", + "end_time": "2025-12-23T09:33:14.655882742Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:14.656101Z", + "start_time": "2025-12-23T09:33:14.656249Z", + "end_time": "2025-12-23T09:33:14.729945Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:14.65583334Z", + "start_time": "2025-12-23T09:33:14.655869242Z", + "end_time": "2025-12-23T09:33:14.655909143Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:14.655877042Z", + "start_time": "2025-12-23T09:33:14.655932644Z", + "end_time": "2025-12-23T09:33:14.655949845Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 153, + "min_processing_ms": 73, + "max_processing_ms": 80, + "avg_processing_ms": 76, + "median_processing_ms": 80, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3279, + "slowest_section_id": 0, + "slowest_section_time_ms": 82 + } +} diff --git a/data/output/00238396d0ef997862d8143babc0847927a1699e.json b/data/output/00238396d0ef997862d8143babc0847927a1699e.json new file mode 100644 index 0000000..4bb9081 --- /dev/null +++ b/data/output/00238396d0ef997862d8143babc0847927a1699e.json @@ -0,0 +1,254 @@ +{ + "file_name": "00238396d0ef997862d8143babc0847927a1699e.txt", + "total_words": 493, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "medel", + "count": 9 + }, + { + "word": "s", + "count": 9 + }, + { + "word": "that", + "count": 9 + }, + { + "word": "at", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "25m, an image-rights fee of £1.", + "length": 32 + }, + { + "text": "I’m not in the habit of selling anything at a loss.", + "length": 53 + }, + { + "text": "‘People may say that Gary is too good for the Championship.", + "length": 61 + }, + { + "text": "Cardiff chairman Mehmet Dalman wants at least £10million for Medel .", + "length": 69 + }, + { + "text": "I’ve had no indication whatsoever from Gary that he may want to go.", + "length": 69 + }, + { + "text": "There was also an agent’s fee of £800,000, a signing-on fee of £1.", + "length": 70 + }, + { + "text": "In demand: Medel (right) is being courted by a number of sides in La Liga .", + "length": 75 + }, + { + "text": "Cardiff's record signing Gary Medel celebrates Chile's first goal against Spain .", + "length": 81 + }, + { + "text": "6m for a player who rarely gives interviews, and a weekly basic wage of £35,000.", + "length": 81 + }, + { + "text": "But then we’ve got lots of players who are good enough to play in the Premier League.", + "length": 87 + }, + { + "text": "‘If so, and Gary comes to me and says he wishes to leave, I will engage in those discussions then.", + "length": 100 + }, + { + "text": "Cardiff boss Solskjaer has said that Medel should be playing at a higher level than the Championship .", + "length": 102 + }, + { + "text": "‘We’ve made six summer signings already and have set out our stall to try to regain instant promotion.", + "length": 106 + }, + { + "text": "‘But even if that were to be the case, I think it’s fair to say I’m not known as a pushover in negotiations.", + "length": 114 + }, + { + "text": "‘I’ve no doubts that after what Gary has been doing in this World Cup, there will be interested parties out there.", + "length": 118 + }, + { + "text": "He said: ‘After what he’s been doing out in Brazil, there may well be a few offers on the table at the end of the tournament.", + "length": 129 + }, + { + "text": "Medel is expected to leave the relegated club after the World Cup, with Valencia leading a group of La Liga sides interested in the midfielder.", + "length": 143 + }, + { + "text": "Cardiff City chairman Mehmet Dalman has warned clubs interested in Gary Medel that they will not sell their £10million record signing at a loss.", + "length": 145 + }, + { + "text": "Dalman added: ‘I hope that once this tournament is over, he will choose to stick around and help us try to regain our place in the Premier League.", + "length": 148 + }, + { + "text": "’ Sportsmail revealed in December the sizeable outlay of Medel’s signing, with his initial £10m fee possibly rising to £15m dependant on clauses.", + "length": 151 + }, + { + "text": "But Dalman insists no offers have been received and strongly indicated that Medel will not be allowed to leave for less than the record fee Cardiff paid.", + "length": 153 + }, + { + "text": "‘But he is under contract to Cardiff City and sometimes it’s not necessarily about the league you play in, it’s about the club you play for and their ambition.", + "length": 165 + }, + { + "text": "The Chilean is understood to have a release clause in his contract which approximately matches the fee Cardiff paid Sevilla last summer - a fee that started at £10million, but featured significant additional payments.", + "length": 218 + }, + { + "text": "Ole Gunnar Solskjaer has previously said Medel ‘deserves to be playing at a higher level than the Championship’ and the 26-year-old’s impressive performances at the World Cup have supported those claims after a mediocre season in Wales.", + "length": 242 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.48097482323646545 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:15.156160456Z", + "first_section_created": "2025-12-23T09:33:15.157678719Z", + "last_section_published": "2025-12-23T09:33:15.157848726Z", + "all_results_received": "2025-12-23T09:33:15.215356295Z", + "output_generated": "2025-12-23T09:33:15.215531203Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 57, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:15.157678719Z", + "publish_time": "2025-12-23T09:33:15.157848726Z", + "first_worker_start": "2025-12-23T09:33:15.158314545Z", + "last_worker_end": "2025-12-23T09:33:15.21456Z", + "total_journey_time_ms": 56, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:15.158313145Z", + "start_time": "2025-12-23T09:33:15.158375248Z", + "end_time": "2025-12-23T09:33:15.158455851Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:15.158502Z", + "start_time": "2025-12-23T09:33:15.158664Z", + "end_time": "2025-12-23T09:33:15.21456Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 55 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:15.158248442Z", + "start_time": "2025-12-23T09:33:15.158314545Z", + "end_time": "2025-12-23T09:33:15.158386848Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:15.158295344Z", + "start_time": "2025-12-23T09:33:15.158350746Z", + "end_time": "2025-12-23T09:33:15.158407049Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 55, + "min_processing_ms": 55, + "max_processing_ms": 55, + "avg_processing_ms": 55, + "median_processing_ms": 55, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2735, + "slowest_section_id": 0, + "slowest_section_time_ms": 56 + } +} diff --git a/data/output/00238f16261b335618b065b5a4e5433de3c14615.json b/data/output/00238f16261b335618b065b5a4e5433de3c14615.json new file mode 100644 index 0000000..5154ff1 --- /dev/null +++ b/data/output/00238f16261b335618b065b5a4e5433de3c14615.json @@ -0,0 +1,404 @@ +{ + "file_name": "00238f16261b335618b065b5a4e5433de3c14615.txt", + "total_words": 1229, + "top_n_words": [ + { + "word": "the", + "count": 98 + }, + { + "word": "in", + "count": 38 + }, + { + "word": "of", + "count": 33 + }, + { + "word": "his", + "count": 31 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "cumberbatch", + "count": 25 + }, + { + "word": "to", + "count": 25 + }, + { + "word": "plantation", + "count": 19 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "family", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "6m.", + "length": 3 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "But he had no son to inherit.", + "length": 29 + }, + { + "text": "But the writing was on the wall.", + "length": 32 + }, + { + "text": "Pictured is the sitting room at Cleland .", + "length": 41 + }, + { + "text": "Pictured is an old map of the plantation .", + "length": 42 + }, + { + "text": "Pictured above is a worker on the plantation today .", + "length": 52 + }, + { + "text": "By the time he died in 1750, Abraham had built up a small fortune.", + "length": 66 + }, + { + "text": "History: A picture of Steven, left, as a young boy with his family .", + "length": 68 + }, + { + "text": "In today's money, the compensation would be the equivalent of more than £3.", + "length": 76 + }, + { + "text": "The nearby St Nicholas Abbey plantation was also own by the Cumberbatch family .", + "length": 80 + }, + { + "text": "Faith: The inside of the church where Benedict's family once worshiped in Barbados .", + "length": 84 + }, + { + "text": "Working: The plantation is still going today, although underneath the ownership of a new family .", + "length": 97 + }, + { + "text": "Owner: Steven Tempro stands on the steps where Cumberbatch's ancestors once watched over their land .", + "length": 102 + }, + { + "text": "Workers rights: Today, the people who work on the plantation are all paid a fair wage for a day's work .", + "length": 104 + }, + { + "text": "His mother Wanda warned him not to use his surname, for fear of reprisals fro the descendants of slaves .", + "length": 105 + }, + { + "text": "Empire: Cumberbatch's direct ancestors were not the only members of the family to own property on the island.", + "length": 109 + }, + { + "text": "At the time, he half-joked he took the role of Willaim Pitt the Younger 'as a sort of apology' for his ancestry.", + "length": 112 + }, + { + "text": "Modern: The plantation was bought by 66-year-old Steven Tempro in 1985, and has been updated for modern living .", + "length": 112 + }, + { + "text": "Pastor: Cumberbatch's character in 12 Years A Slave was a pastor - yet did not disagree with owning other human beings .", + "length": 120 + }, + { + "text": "Diversifying: Cumberbatch did not just own the farms however, he also built refineries, which helped to add ot his fortune.", + "length": 123 + }, + { + "text": "It was a role which, surely, wasn't that far removed from his own ancestors, who moved to Barbados in the late 17th century.", + "length": 124 + }, + { + "text": "But it would be his role in the Oscar-winning 12 Years a Slave which would truly thrust the Cumberbatch history into the spotlight.", + "length": 131 + }, + { + "text": "The family lived in comfort, in the white walled house with shuttered windows and large rooms where they could shelter from the heat.", + "length": 133 + }, + { + "text": "The Cumberbatch family are aware of how their dark past at Cleland Plantation, in Saint Andrew, Barbados, is viewed by the wider world.", + "length": 135 + }, + { + "text": "The plantations passed down the generations until 1816, when Abraham's grandson, Abraham Parry, faced a rebellion from one of his slaves.", + "length": 137 + }, + { + "text": "But Cumberbatch, 38, has always been open about his history and, if anything, appears to have sought out roles which allow him no room to hide.", + "length": 143 + }, + { + "text": "Portrait: Yet more than 150 years after the Cumberbatch family left the plantation, a painting of Abraham Cumberbatch still stands in the hallway .", + "length": 147 + }, + { + "text": "Now, MailOnline can show pictures of inside the plantation where hundreds of men and women toiled as slaves while his ancestors reaped the rewards.", + "length": 147 + }, + { + "text": "The Oscar-nominee, who is up for the best actor gong on February 22, first spoke of it when he appeared in the abolition film Amazing Grace in 2006.", + "length": 148 + }, + { + "text": "The move was the idea of his eighth-great-grandfather, Joshua, who brought his wife Ann and three children half way across the world for a new life.", + "length": 148 + }, + { + "text": "Wealth: The Cumberbatch family owned Cleland Plantation, in Saint Andrew, Barbados, (pictured) for 100 years - during which time as many as 250 slaves .", + "length": 152 + }, + { + "text": "The uprising lasted two months until Bussa and an estimated 1,000 slaves were killed – another 214 slaves were executed and 123 shipped off the island.", + "length": 153 + }, + { + "text": "Art imitating life: Cumberbatch played William Ford in the 2013 film, a plantation owner who was the first man to buy Solomon Northup, played by Ejiofor .", + "length": 154 + }, + { + "text": "But Joshua died shortly after arrival and would never see the plantations his descendants would go onto own after his son Abraham built up the family fortune.", + "length": 158 + }, + { + "text": "Compensation: When the slave trade came to an end in 1838, Abraham Parry, Cumberbatch's great-great-great grandfather was given £6,000 for the loss of his human property.", + "length": 171 + }, + { + "text": "'Sort of apology': Benedict Cumberbatch as William Pitt the Younger, in 2006's Amazing Grace - a role he took as a 'sort of apology' for his family's part in the slave trade .", + "length": 175 + }, + { + "text": "His mother, the actress Wanda Ventham, had urged him not to use his real surname professionally, in case it made him a target for reparation claims by the descendants of slaves.", + "length": 177 + }, + { + "text": "learned the basics of the sugar industry as a salaried plantation foreman, before buying up plantations in the early 18th century and building refineries to turn raw cane into sugar.", + "length": 182 + }, + { + "text": "Comfortable: The Cumberbatch family were able to build up their wealth by using slaves to work on the sugar plantation - using it to create a comfortable lifestyle in the Caribbean country.", + "length": 189 + }, + { + "text": "His regret will no doubt have been intensified by the fact, just less than two years ago, the painful matter of how his family made its fortune came under scrutiny when he starred in 12 Years A Slave.", + "length": 200 + }, + { + "text": "The film, based on the memoir of Solomon Northup, a 19th century black musician who was kidnapped and sold into the slave trade, saw Cumberbatch playing William Ford, the man who 'owned' Northup first.", + "length": 201 + }, + { + "text": "Modern day: Cumberbatch with his parents Wanda Ventham (left) and Timothy Carlton (right, with actress Tracie Bennett in 2012), who are both professional actors who even appeared alongside him in Sherlock.", + "length": 205 + }, + { + "text": "It is hard to imagine the mortification Benedict Cumberbatch felt after he used the word 'coloured' instead of 'black' when trying to make a point about the lack of roles available for ethnic minorities in the UK.", + "length": 213 + }, + { + "text": "Slave trade: Abraham Cumberbatch - seen here in a portrait hanging in St Nicholas Abbey Planation - was Benedict Cumberbatch's seventh great-grandfather, and the first of his family to run a plantation using slaves .", + "length": 216 + }, + { + "text": "Property: The people enslaved on the sugar plantation were seen as nothing more than property by Cumberbatch's ancestors: They were included in the overall value of the property, alongside the land, in the family's accounts .", + "length": 225 + }, + { + "text": "Rebellion: The family - whose members are thought to be buried at St Peter's Parish Church, in Barbados - left the country in 1820, four years after the slaves revolted in 1816, led by a man called Bussa - who worked on their plantation .", + "length": 238 + }, + { + "text": "His will showed his priorities: he left his estate to his grandson Abraham (his daughter's son) on the condition he changed his surname to Cumberbatch and stipulated that his plantation 'be kept staffed with 250 negro slaves and 150 head of cattle'.", + "length": 249 + }, + { + "text": "Cumberbatch's ancestors were slave owners, boasting '250 negroes' on their sugar plantation in Barbados in the 18th and 19th centuries, and were given - on the abolition of the slave trade in the 1830s -  £6,000 to compensate for the loss of their 'human property'.", + "length": 267 + }, + { + "text": "Will: Abraham had built up a significant fortune, but had no sons - so he left the plantation to his grandson, on the understanding he would change his surname to Cumberbatch, while also stipulating it would also 'be kept staffed with 250 negro slaves' and 150 cattle .", + "length": 269 + }, + { + "text": "In 1820 Abraham Parry returned to Britain to live at Fairwater House, a stately home, in Devon, which is now the site of Taunton School, and 18 years later the slave trade in Barbados was abolished – Cumberbatch great-great-great grandfather received more than £6,000 for the loss of their 'human' property.", + "length": 310 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.639557421207428 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:15.65861346Z", + "first_section_created": "2025-12-23T09:33:15.660120922Z", + "last_section_published": "2025-12-23T09:33:15.660578541Z", + "all_results_received": "2025-12-23T09:33:15.764080806Z", + "output_generated": "2025-12-23T09:33:15.764337216Z", + "total_processing_time_ms": 105, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 103, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:15.660120922Z", + "publish_time": "2025-12-23T09:33:15.660354032Z", + "first_worker_start": "2025-12-23T09:33:15.66080895Z", + "last_worker_end": "2025-12-23T09:33:15.763068Z", + "total_journey_time_ms": 102, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:15.66081005Z", + "start_time": "2025-12-23T09:33:15.660882153Z", + "end_time": "2025-12-23T09:33:15.660993258Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:15.661159Z", + "start_time": "2025-12-23T09:33:15.661295Z", + "end_time": "2025-12-23T09:33:15.763068Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 101 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:15.660753148Z", + "start_time": "2025-12-23T09:33:15.66080895Z", + "end_time": "2025-12-23T09:33:15.660916555Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:15.660942756Z", + "start_time": "2025-12-23T09:33:15.66103436Z", + "end_time": "2025-12-23T09:33:15.661089962Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:15.660461636Z", + "publish_time": "2025-12-23T09:33:15.660578541Z", + "first_worker_start": "2025-12-23T09:33:15.661114563Z", + "last_worker_end": "2025-12-23T09:33:15.748244Z", + "total_journey_time_ms": 87, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:15.661092262Z", + "start_time": "2025-12-23T09:33:15.661130964Z", + "end_time": "2025-12-23T09:33:15.661177066Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:15.661305Z", + "start_time": "2025-12-23T09:33:15.661435Z", + "end_time": "2025-12-23T09:33:15.748244Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 86 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:15.661075561Z", + "start_time": "2025-12-23T09:33:15.661114563Z", + "end_time": "2025-12-23T09:33:15.661180266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:15.661086662Z", + "start_time": "2025-12-23T09:33:15.661121563Z", + "end_time": "2025-12-23T09:33:15.661150865Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 187, + "min_processing_ms": 86, + "max_processing_ms": 101, + "avg_processing_ms": 93, + "median_processing_ms": 101, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3567, + "slowest_section_id": 0, + "slowest_section_time_ms": 102 + } +} diff --git a/data/output/002413ce02e43d934399ac6f700e068f9bea10d4.json b/data/output/002413ce02e43d934399ac6f700e068f9bea10d4.json new file mode 100644 index 0000000..c955f8b --- /dev/null +++ b/data/output/002413ce02e43d934399ac6f700e068f9bea10d4.json @@ -0,0 +1,370 @@ +{ + "file_name": "002413ce02e43d934399ac6f700e068f9bea10d4.txt", + "total_words": 678, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "a", + "count": 18 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "jews", + "count": 8 + }, + { + "word": "circumcision", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "Jews .", + "length": 6 + }, + { + "text": "'The .", + "length": 6 + }, + { + "text": "Daniel S.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Hasidic Jews.", + "length": 13 + }, + { + "text": "Circumcision .", + "length": 14 + }, + { + "text": "Rick Dewsbury .", + "length": 15 + }, + { + "text": "The other survived.", + "length": 19 + }, + { + "text": "4 per 100,000 cases.", + "length": 20 + }, + { + "text": "It's only their customs.", + "length": 24 + }, + { + "text": "05:07 EST, 9 July 2012 .", + "length": 24 + }, + { + "text": "06:23 EST, 9 July 2012 .", + "length": 24 + }, + { + "text": "can actually spread diseases.", + "length": 29 + }, + { + "text": "religious freedoms,' he added.", + "length": 30 + }, + { + "text": "Infectious diseases specialist Dr.", + "length": 34 + }, + { + "text": "New York City Health Commissioner Dr.", + "length": 37 + }, + { + "text": "ritual has nothing to do with religion.", + "length": 39 + }, + { + "text": "It is practiced widely in Israel and among .", + "length": 44 + }, + { + "text": "carried out on their babies to sign a consent waiver.", + "length": 53 + }, + { + "text": "men must be circumcised eight days after they are born.", + "length": 55 + }, + { + "text": "the baby's penis was initially thought to prevent infection.", + "length": 60 + }, + { + "text": "Devout: Three young ultra orthodox Jews in traditional clothing.", + "length": 64 + }, + { + "text": "Dr Berman also accused New York government chiefs of 'racial bias'.", + "length": 67 + }, + { + "text": "they've managed to convince the city that it's a violation of their .", + "length": 69 + }, + { + "text": "medical advances over the last hundred years have made clear that it .", + "length": 70 + }, + { + "text": "believed that blood was the 'life-giving element' and sucking it from .", + "length": 71 + }, + { + "text": "rituals originate from Scriptures, in which God tells Abraham that all .", + "length": 72 + }, + { + "text": "pushing through regulation forcing anybody wishing to have the procedure .", + "length": 74 + }, + { + "text": "Berman defended the practice in a paper published in the Jewish journal Dialogue.", + "length": 81 + }, + { + "text": "Almost 20,500 baby boys had the procedure carried out in New York in June this year.", + "length": 84 + }, + { + "text": "Some Jews have started using pipettes instead of oral suction because they are more hygienic .", + "length": 94 + }, + { + "text": "He claimed there is no evidence that the 'metzitzah b'peh' procedures caused the infant deaths.", + "length": 95 + }, + { + "text": "The earlier death was in November 2004, when a twin caught herpes after undergoing the procedure.", + "length": 97 + }, + { + "text": "The most recent of the deaths was in Brooklyn last September and a criminal investigation is still ongoing.", + "length": 107 + }, + { + "text": "Thomas Farley said in a statement: 'There is no safe way to perform oral suction on any open wound in a newborn.", + "length": 112 + }, + { + "text": "But some Orthodox Jews have complained about the measures claiming that they infringe on their 'religious freedom'.", + "length": 115 + }, + { + "text": "Numbers of cases in New York alone emerged after the city's health department launched an investigation following the deaths.", + "length": 125 + }, + { + "text": "Rabbi Moshe Tendler, professor of Talmudic Law and Bioethics at Yeshiva University, told KTLA that the practice was 'primitive nonsense'.", + "length": 137 + }, + { + "text": "A controversial Jewish circumcision practice in which the blood of a baby's cut penis is sucked by a religious leader has been condemned after the deaths of two infants.", + "length": 169 + }, + { + "text": "'Because blood is the life-giving element, they believe that it's supposed to be part of the whole procedure,' he said, adding that there were 'no known medical benefits'.", + "length": 171 + }, + { + "text": "Controversial: A mohel, left, prepares to carry out a circumcision on a baby boy as prayers are read during a traditional Jewish ceremony (file picture) Heath chiefs in New York are now .", + "length": 187 + }, + { + "text": "According to the findings of the investigation, infants who were circumcise with suction between April 2006 and December 2012 had a risk of catching neonatal herpes (HSV-1) infection of 24.", + "length": 189 + }, + { + "text": "' Jeffrey Mazlin, a certified mohel and physician in New York who regularly practices circumcision procedures, said Orthodox Jews look view the religion as 'more important than individuals'.", + "length": 190 + }, + { + "text": "The 'metzitzah b'peh' performed by ultra Orthodox Jews sees the eight-day old baby have a traditional circumcision but the 'mohel' then places his mouth around the wound and sucks up the blood.", + "length": 193 + }, + { + "text": "An alternative to the practitioner removing the blood with their mouth is to use a sterilised glass tube or pipette to create the suction, which some Jews have started incorporating into the ritual.", + "length": 198 + }, + { + "text": "But the practice - intended to prevent infection - has sparked controversy in recent years after the death of two infants and the cntraction of herpes in at least 11 others between November 2000 and December 2011.", + "length": 213 + }, + { + "text": "'Parents considering ritual circumcision need to know that circumcision should only be performed under sterile conditions, like any other procedures that create open cuts, whether by mohelim or medical professionals.", + "length": 216 + }, + { + "text": "The suction practice is carried out among strict followers of the religion (file picture) Homeland: There were almost 20,500 metzitzah b'peh procedures in New York in June this year but the practice is carried out widely in Israel, pictured .", + "length": 242 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5271329283714294 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:16.161421678Z", + "first_section_created": "2025-12-23T09:33:16.161775593Z", + "last_section_published": "2025-12-23T09:33:16.162008003Z", + "all_results_received": "2025-12-23T09:33:16.23304843Z", + "output_generated": "2025-12-23T09:33:16.233275439Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:16.161775593Z", + "publish_time": "2025-12-23T09:33:16.162008003Z", + "first_worker_start": "2025-12-23T09:33:16.162519824Z", + "last_worker_end": "2025-12-23T09:33:16.232073Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:16.162510623Z", + "start_time": "2025-12-23T09:33:16.162598327Z", + "end_time": "2025-12-23T09:33:16.162689131Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:16.16277Z", + "start_time": "2025-12-23T09:33:16.162905Z", + "end_time": "2025-12-23T09:33:16.232073Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:16.162451921Z", + "start_time": "2025-12-23T09:33:16.162519824Z", + "end_time": "2025-12-23T09:33:16.162600327Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:16.162454121Z", + "start_time": "2025-12-23T09:33:16.162525224Z", + "end_time": "2025-12-23T09:33:16.162558925Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 69, + "min_processing_ms": 69, + "max_processing_ms": 69, + "avg_processing_ms": 69, + "median_processing_ms": 69, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4146, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/00244f5a773fb257564b410bc8b72bdc94943de8.json b/data/output/00244f5a773fb257564b410bc8b72bdc94943de8.json new file mode 100644 index 0000000..3fdc6f2 --- /dev/null +++ b/data/output/00244f5a773fb257564b410bc8b72bdc94943de8.json @@ -0,0 +1,262 @@ +{ + "file_name": "00244f5a773fb257564b410bc8b72bdc94943de8.txt", + "total_words": 444, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "for", + "count": 12 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "on", + "count": 9 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "lessons", + "count": 6 + }, + { + "word": "rada", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "19:12 EST, 11 January 2014 .", + "length": 28 + }, + { + "text": "06:53 EST, 12 January 2014 .", + "length": 28 + }, + { + "text": "Brendan Carlin, Mail on Sunday Political Reporter .", + "length": 51 + }, + { + "text": "Treasury officials refused to give details on what the lessons involved.", + "length": 72 + }, + { + "text": "The Department for International Development spent £2,370 on coaching in 2012/13.", + "length": 82 + }, + { + "text": "But Whitehall officials insisted the coaching was for civil servants – not for Ministers.", + "length": 91 + }, + { + "text": "George Osbourne and other Minister splashed out £10,000 on lessons from the acting school .", + "length": 92 + }, + { + "text": "Neither the Government departments nor Rada would yesterday say exactly what had been taught in the lessons.", + "length": 108 + }, + { + "text": "Rada Enterprises, which provides the courses, lists eight other ministries and public bodies as ‘clients’.", + "length": 110 + }, + { + "text": "Figures revealed thousands of taxpayers' money was spent on lessons at the Royal Academy of Dramatic Art in London .", + "length": 116 + }, + { + "text": "’ A spokeswoman for Rada Enterprises insisted it provided ‘communication skills training’ – not acting lessons.", + "length": 119 + }, + { + "text": "They paid the world-famous Royal Academy of Dramatic Art (Rada) for actors and voice experts in a bid to boost their performance.", + "length": 129 + }, + { + "text": "The Department of Health spent just over £3,500 for three courses in 2012 while the Cabinet Office ran up a £675 bill in December, 2012.", + "length": 138 + }, + { + "text": "Mr Osborne famously took voice-coaching from a £100-an-hour Harley Street expert to improve his own image when the Tories were in Opposition.", + "length": 142 + }, + { + "text": "Embarrassingly for Mr Osborne, details of the Treasury’s own £3,000 drama bill emerged just days after he warned of the need for fresh cuts.", + "length": 143 + }, + { + "text": "The Mail on Sunday revealed in 2008 that Mr Osborne had received help from leading speech therapist Valerie Savage to make him sound less ‘posh’.", + "length": 149 + }, + { + "text": "According to Government credit card records, the Treasury last year paid for two sets of Rada courses –  £1,541 in October and £1,500 in February.", + "length": 151 + }, + { + "text": "She added: ‘Communication is a core skill for in any professional environment and our training increases effectiveness in meetings, panels and presentations.", + "length": 159 + }, + { + "text": "Last night, a government spokesman said: ‘Civil servants need the right skills to perform at the highest level and deliver better, more efficient services for the public.", + "length": 172 + }, + { + "text": "Rada, which helped launched the careers of Sir John Gielgud and Sir Kenneth Branagh, offers ‘role-playing’ with actors, voice-coaching and tips to boost ‘confidence and gravitas’.", + "length": 187 + }, + { + "text": "Chancellor George Osborne and other Coalition Ministers splashed out £10,000 of taxpayers’ money on acting lessons from Britain’s premier drama school at the same time as overseeing deep public-spending cuts.", + "length": 213 + }, + { + "text": "’ But Jonathan Isaby, chief executive of the Taxpayers’ Alliance, said: ‘When Ministers are rightly looking for extensive savings so that resources can be focused on delivering essential frontline services, people will be astounded that they are still splashing out on this kind of coaching.", + "length": 297 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5129166841506958 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:16.662778037Z", + "first_section_created": "2025-12-23T09:33:16.664235897Z", + "last_section_published": "2025-12-23T09:33:16.664430905Z", + "all_results_received": "2025-12-23T09:33:16.735842148Z", + "output_generated": "2025-12-23T09:33:16.736036656Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:16.664235897Z", + "publish_time": "2025-12-23T09:33:16.664430905Z", + "first_worker_start": "2025-12-23T09:33:16.664915325Z", + "last_worker_end": "2025-12-23T09:33:16.734881Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:16.664906825Z", + "start_time": "2025-12-23T09:33:16.664963227Z", + "end_time": "2025-12-23T09:33:16.665066931Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:16.665168Z", + "start_time": "2025-12-23T09:33:16.665329Z", + "end_time": "2025-12-23T09:33:16.734881Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:16.664843322Z", + "start_time": "2025-12-23T09:33:16.664915325Z", + "end_time": "2025-12-23T09:33:16.665061531Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:16.664851822Z", + "start_time": "2025-12-23T09:33:16.664920625Z", + "end_time": "2025-12-23T09:33:16.664945626Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 69, + "min_processing_ms": 69, + "max_processing_ms": 69, + "avg_processing_ms": 69, + "median_processing_ms": 69, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2832, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/002455497cec3b3370399edca88fbba8c6ba93d6.json b/data/output/002455497cec3b3370399edca88fbba8c6ba93d6.json new file mode 100644 index 0000000..02abe35 --- /dev/null +++ b/data/output/002455497cec3b3370399edca88fbba8c6ba93d6.json @@ -0,0 +1,424 @@ +{ + "file_name": "002455497cec3b3370399edca88fbba8c6ba93d6.txt", + "total_words": 1100, + "top_n_words": [ + { + "word": "the", + "count": 79 + }, + { + "word": "in", + "count": 42 + }, + { + "word": "of", + "count": 37 + }, + { + "word": "a", + "count": 34 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "s", + "count": 17 + }, + { + "word": "as", + "count": 14 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "on", + "count": 13 + }, + { + "word": "people", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "conflict.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Some victims had no eyes.", + "length": 25 + }, + { + "text": "12:20 EST, 21 January 2014 .", + "length": 28 + }, + { + "text": "11:44 EST, 21 January 2014 .", + "length": 28 + }, + { + "text": "Emergency services in Aleppo.", + "length": 29 + }, + { + "text": "'Then we saw people on the ground - like every time.", + "length": 52 + }, + { + "text": "Lebanese citizens gather at the site of a car bombing.", + "length": 54 + }, + { + "text": "Iran is the chief patron of Hezbollah and an ally of Syria.", + "length": 59 + }, + { + "text": "Assad denies the claims, insisting he is fighting terrorists.", + "length": 61 + }, + { + "text": "violence has targeted both Sunnis and Shiites, and further stoked .", + "length": 67 + }, + { + "text": "Lebanon's state media said a suicide car bomber was behind the attack.", + "length": 70 + }, + { + "text": "sectarian tensions that are already running high on opposing sides of the Syrian .", + "length": 82 + }, + { + "text": "A December car bombing in Beirut killed prominent Sunni politician Mohammed Chatah.", + "length": 83 + }, + { + "text": "Thousands lined the street as the dead and injured were stretchered from the scene .", + "length": 84 + }, + { + "text": "Another attack in November targeted the Iranian Embassy and killed at least 23 people.", + "length": 86 + }, + { + "text": "The claim, which could not be independently verified, was posted on the group's Twitter account.", + "length": 96 + }, + { + "text": "The attacks raise the specter of a sharply divided Lebanon being dragged further into the Syrian conflict.", + "length": 106 + }, + { + "text": "Its name suggested ties to the al-Qaida-linked Nusra Front in Syria, one of the most powerful rebel factions.", + "length": 109 + }, + { + "text": "'There was a car beeping, and then it exploded,' an unnamed eyewitness told the Voice of Lebanon radio station.", + "length": 111 + }, + { + "text": "And on January 2, a bombing in Haret Hreik just meters (yards) from where Tuesday's attack occurred killed five people.", + "length": 119 + }, + { + "text": "Lebanese firefighters extinguish fire from burning cars following an explosion in Haret Hreik, a south Beirut neighbourhood .", + "length": 125 + }, + { + "text": "The attack came as fresh pictures emerged from the northern Syrian city of Aleppo after an alleged airstrike by government forces.", + "length": 130 + }, + { + "text": "A car bomb ripped through a Shia neighbourhood in Beirut today, killing at least four people as Syria’s civil war spilled over the border.", + "length": 140 + }, + { + "text": "The car bomb ripped through a Shia neighbourhood in Beirut today, killing at least four people as Syria's civil war spilled over the border .", + "length": 141 + }, + { + "text": "The city, with impoverished rival Sunni and Shiite areas, has seen frequent sectarian clashes linked to the war Syria that have killed dozens.", + "length": 142 + }, + { + "text": "A Hezbollah member collects pieces of a destroyed vehicle at the site of the bomb blast in the Haret Hreik area at southern suburb of Beirut .", + "length": 142 + }, + { + "text": "At least four people have been killed in the explosion in a stronghold of the Shiite militant group Hezbollah in the Lebanese capital Beirut .", + "length": 142 + }, + { + "text": "The attack came as fresh pictures emerged from the northern Syrian city of Aleppo in the aftermath of an alleged airstrike by government forces.", + "length": 144 + }, + { + "text": "Shortly after Tuesday's bombing, clashes broke out in the northern Lebanese city of Tripoli, killing at least one person, said the security official.", + "length": 149 + }, + { + "text": "Lebanese emergency personnel carry a body on stretcher from the scene of an apparent suicide car bombing in Beirut's southern neighbourhood of Haret Hreik .", + "length": 156 + }, + { + "text": "The latest fighting unraveled a tenuous truce in effect earlier in the morning, following clashes that broke out between the rival neighborhoods on Saturday.", + "length": 157 + }, + { + "text": "Footage broadcast by the Hezbollah-owned al-Manar television station showed medics hauling a man on a stretcher out of the area as flames engulfed a building.", + "length": 158 + }, + { + "text": "Lebanon's Sunni community has also been hit, most notably by a deadly double car bombing outside Sunni mosques in the northern Lebanese city of Tripoli in August.", + "length": 162 + }, + { + "text": "The shocking images show emaciated corpses with strangulation marks, cuts, bruising and signs of electrocution – evidence of extreme torture, claim investigators.", + "length": 164 + }, + { + "text": "A man runs as he carries a child who survived from what activists say was an airstrike by forces loyal to Syrian President Bashar al-Assad, at al-Ferdaws in Aleppo .", + "length": 165 + }, + { + "text": "At least another 30 people died earlier in the week in violence across the country, and it was the second bombing in the Lebanese neighborhood of Haret Hreik this month.", + "length": 169 + }, + { + "text": "On Thursday, a car bomb struck the northeastern Shiite town of Hermel close to the Syrian border during rush hour, killing at least three people and wounding more than 20.", + "length": 171 + }, + { + "text": "Today's explosion on a crowded street left 35 people wounded as it shattered shop windows, set cars ablaze and left debris across the pavement according to the Lebanese Red Cross.", + "length": 179 + }, + { + "text": "More than 130,000 people have been killed in Syria's nearly three-year war, the Syrian Observatory for Human Rights estimates, and millions more have been forced to flee their homes .", + "length": 183 + }, + { + "text": "It was the second bombing in the neighborhood of Haret Hreik this month amid a series of attacks that have shaken Lebanon in a spillover of Syria's civil war into its smaller neighbor.", + "length": 184 + }, + { + "text": "The 55,000 photos will ratchet up the pressure on President Bashar Al Assad who the US and its Western allies – including the UK – say has committed war crimes against his own people.", + "length": 187 + }, + { + "text": "It comes as a military police photographer has smuggled out of Syria evidence of the torture and killing of 11,000 detainees, according to a report by three former war crimes prosecutors.", + "length": 187 + }, + { + "text": "A group known as the Nusra Front in Lebanon claimed responsibility for the attack, saying it was in retaliation for Hezbollah's military support of President Bashar Assad's forces in Syria.", + "length": 189 + }, + { + "text": "Today's explosion in Beirut on a crowded street left 35 people wounded as it shattered shop windows, set cars ablaze and left debris across the pavement according to the Lebanese Red Cross .", + "length": 190 + }, + { + "text": "A group known as the Nusra Front in Lebanon claimed responsibility for the attack, saying it was in retaliation for Hezbollah's military support of President Bashar Assad's forces in Syria .", + "length": 190 + }, + { + "text": "A security official, speaking on condition of anonymity because he was not authorized to talk to the media, said the vehicle was stolen and packed with 20 kilograms (44 pounds) of explosives.", + "length": 191 + }, + { + "text": "Syrians help an injured child following an alleged airstrike by government forces near a school in the Syrian city of Aleppo (left) and (right) Hezbollah members inspect damaged cars in Beirut .", + "length": 194 + }, + { + "text": "Flames could be seen pouring from a multi-storey building as firefighters tried desperately to extinguish the flames, and thousands lined the street as the dead and injured were stretchered from the scene.", + "length": 205 + }, + { + "text": "The blast is the sixth in a string targeting areas considered strongholds of Hezbollah since the group announced it was sending fighters to support President Bashar al-Assad's troops in neighbouring Syria .", + "length": 206 + }, + { + "text": "apparently the work of Syria-based Sunni rebels or militant Islamist groups fighting to topple Assad who have threatened to target Hezbollah strongholds in Lebanon in retaliation for intervening on behalf of his government in the conflict.", + "length": 239 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8291600346565247 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:17.165572755Z", + "first_section_created": "2025-12-23T09:33:17.166930811Z", + "last_section_published": "2025-12-23T09:33:17.167280025Z", + "all_results_received": "2025-12-23T09:33:17.261050189Z", + "output_generated": "2025-12-23T09:33:17.261260598Z", + "total_processing_time_ms": 95, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 93, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:17.166930811Z", + "publish_time": "2025-12-23T09:33:17.16715052Z", + "first_worker_start": "2025-12-23T09:33:17.167699142Z", + "last_worker_end": "2025-12-23T09:33:17.260201Z", + "total_journey_time_ms": 93, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:17.167699442Z", + "start_time": "2025-12-23T09:33:17.167776546Z", + "end_time": "2025-12-23T09:33:17.167851449Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:17.167869Z", + "start_time": "2025-12-23T09:33:17.167998Z", + "end_time": "2025-12-23T09:33:17.260201Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 92 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:17.167698442Z", + "start_time": "2025-12-23T09:33:17.167781746Z", + "end_time": "2025-12-23T09:33:17.167904551Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:17.16763514Z", + "start_time": "2025-12-23T09:33:17.167699142Z", + "end_time": "2025-12-23T09:33:17.167765245Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:17.167212222Z", + "publish_time": "2025-12-23T09:33:17.167280025Z", + "first_worker_start": "2025-12-23T09:33:17.167703142Z", + "last_worker_end": "2025-12-23T09:33:17.237472Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:17.167826648Z", + "start_time": "2025-12-23T09:33:17.167869349Z", + "end_time": "2025-12-23T09:33:17.167898351Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:17.16813Z", + "start_time": "2025-12-23T09:33:17.168272Z", + "end_time": "2025-12-23T09:33:17.237472Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:17.167766545Z", + "start_time": "2025-12-23T09:33:17.167802147Z", + "end_time": "2025-12-23T09:33:17.167845848Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:17.167664141Z", + "start_time": "2025-12-23T09:33:17.167703142Z", + "end_time": "2025-12-23T09:33:17.167738044Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 161, + "min_processing_ms": 69, + "max_processing_ms": 92, + "avg_processing_ms": 80, + "median_processing_ms": 92, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3311, + "slowest_section_id": 0, + "slowest_section_time_ms": 93 + } +} diff --git a/data/output/00248e4e65717e00d45d95bb84e9c056a04d5e09.json b/data/output/00248e4e65717e00d45d95bb84e9c056a04d5e09.json new file mode 100644 index 0000000..3de44b6 --- /dev/null +++ b/data/output/00248e4e65717e00d45d95bb84e9c056a04d5e09.json @@ -0,0 +1,412 @@ +{ + "file_name": "00248e4e65717e00d45d95bb84e9c056a04d5e09.txt", + "total_words": 930, + "top_n_words": [ + { + "word": "the", + "count": 59 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "on", + "count": 16 + }, + { + "word": "said", + "count": 15 + }, + { + "word": "brown", + "count": 14 + }, + { + "word": "police", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "9.", + "length": 2 + }, + { + "text": "St.", + "length": 3 + }, + { + "text": "Louis.", + "length": 6 + }, + { + "text": "Louis area.", + "length": 11 + }, + { + "text": "This is a test.", + "length": 15 + }, + { + "text": "Brown was black and unarmed.", + "length": 28 + }, + { + "text": "Highway Patrol Spokesman Lt.", + "length": 28 + }, + { + "text": "You must disperse immediately.", + "length": 30 + }, + { + "text": "'These people aren't from here.", + "length": 31 + }, + { + "text": "It made emotions raw,' he said.", + "length": 31 + }, + { + "text": "Officer Darren Wilson is white.", + "length": 31 + }, + { + "text": "'We must first have and maintain peace.", + "length": 39 + }, + { + "text": "'Who would burn down their own backyard?", + "length": 40 + }, + { + "text": "They came to burn down our city and leave.", + "length": 42 + }, + { + "text": "9 by a white Ferguson officer, Darren Wilson.", + "length": 45 + }, + { + "text": "The teen, who was about to start college, was unarmed .", + "length": 55 + }, + { + "text": "Fallon says the autopsy will take place as soon as possible.", + "length": 60 + }, + { + "text": "' A moment later, police began firing canisters into the crowd.", + "length": 63 + }, + { + "text": "Wilson has been on paid administrative leave since the shooting.", + "length": 64 + }, + { + "text": "Brown's death has ignited days of clashes with furious protesters .", + "length": 67 + }, + { + "text": "Swat vans came down the hill with cops on the top with assault rifles .", + "length": 71 + }, + { + "text": "Michael Brown, 18, was shot dead by a police officer on Saturday night.", + "length": 71 + }, + { + "text": "Ferguson police officer: Darren Wilson, 28, seen in a photo released by Yahoo!", + "length": 78 + }, + { + "text": "' asked Rebecca McCloud, a local who works with the Sonshine Baptist Church in St.", + "length": 82 + }, + { + "text": "Protests have been going on since 18-year-old Michael Brown was shot and killed Aug.", + "length": 84 + }, + { + "text": "'It's appeared to cast aspersions on a young man that was gunned down in the street.", + "length": 84 + }, + { + "text": "The 18-year-old Brown was shot and killed by a Ferguson, Missouri, police officer on Aug.", + "length": 89 + }, + { + "text": "'Obviously, we're trying to give them every opportunity to comply with the curfew,' Hotz said.", + "length": 94 + }, + { + "text": "Louis County prosecutor Bob McCulloch said it could be weeks before the investigation wraps up.", + "length": 95 + }, + { + "text": "On guard: Dozens of heavily armed officers with batons, shields, helmets, assembled in grid formation.", + "length": 102 + }, + { + "text": "A law enforcement officer watches as tear gas is fired to disperse a crowd protesting the shooting of Brown .", + "length": 109 + }, + { + "text": "He also said the Justice Department will still take the state's autopsy into account during the investigation.", + "length": 110 + }, + { + "text": "' As officers put on gas masks, a chant from the distant crowd emerged: 'We have the right to assemble peacefully.", + "length": 114 + }, + { + "text": "John Hotz initially said police only used smoke, but later told The Associated Press they also used tear gas canisters.", + "length": 119 + }, + { + "text": "Local officers faced strong criticism earlier in the week for their use of tear gas and rubber bullets against protesters.", + "length": 122 + }, + { + "text": "On Saturday, some residents said it appeared the violent acts were being committed by people from other suburbs or states.", + "length": 122 + }, + { + "text": "Discussion: President Barack Obama talks with Attorney General Eric Holder to discuss the situation in Ferguson, Missouri .", + "length": 123 + }, + { + "text": "' Wilson, the officer who shot Brown, is a six-year police veteran who had no previous complaints against him, Jackson has said.", + "length": 128 + }, + { + "text": "News receiving a commendation for 'extraordinary effort in the line of duty' in February has been named as the police officer who shot Brown .", + "length": 142 + }, + { + "text": "Slain in broad daylight: This image provided by KMOV-TV shows investigators inspecting the body of Brown on August 9, where he was shot in Ferguson.", + "length": 148 + }, + { + "text": "US Attorney General Eric Holder has asked for the Justice Department to arrange an autopsy on the body of Michael Brown by a federal medical examiner.", + "length": 150 + }, + { + "text": "In announcing the curfew, Nixon said that though many protesters were making themselves heard peacefully, the state would not allow looters to endanger the community.", + "length": 166 + }, + { + "text": "Johnson said earlier Saturday that police would not enforce the curfew with armored trucks and tear gas but would communicate with protesters and give them ample opportunity to leave.", + "length": 183 + }, + { + "text": "The eyes of the world are watching,' Nixon said during a news conference that was interrupted repeatedly by people objecting to the curfew and demanding Wilson be charged with murder.", + "length": 183 + }, + { + "text": "But as the curfew deadline arrived early Sunday, remaining protesters refused to leave the area as officers spoke through a loudspeaker: 'You are in violation of a state-imposed curfew.", + "length": 185 + }, + { + "text": "The US Department of Justice also has deepened its civil rights investigation of the shooting, as Johnson said Saturday that 40 FBI agents were going door-to-door in the neighborhood gathering information.", + "length": 205 + }, + { + "text": "Hands up: As midnight arrived a crowd were on the corner of Canfield Drive and West Florissant Avenue in the St Louis suburb in Missouri, shouting and screaming at the night despite calls for then to go home .", + "length": 209 + }, + { + "text": "The Ferguson Police Department has refused to say anything about Wilson's whereabouts, and Associated Press reporters were unable to contact him at any addresses or phone numbers listed under that name in the St.", + "length": 212 + }, + { + "text": "Justice Department spokesman Brian Fallon said in a news release on Sunday that Holder asked for the additional autopsy because of the \"extraordinary circumstances involved in this case\" and at the request of Brown's family.", + "length": 224 + }, + { + "text": "Making their point: Some demonstrators stood with their hands up early on Sunday morning, the emblematic pose used by many protesters to characterize the position witness have said Brown had assumed when he was fatally shot .", + "length": 225 + }, + { + "text": "Governor Jay Nixon, who declared a state of emergency in Ferguson on Saturday after protests turned violent the night before, said Sunday morning on ABC's 'This Week' that he was not aware the police were going to release the surveillance video.", + "length": 245 + }, + { + "text": "The death heightened racial tensions between the predominantly black community and mostly white Ferguson Police Department, leading to several run-ins between police and protesters and prompting Missouri's governor to put the Highway Patrol in charge of security.", + "length": 263 + }, + { + "text": "The Ferguson Police Department waited six days to publicly reveal the name of the officer and documents alleging Brown robbed a convenience store before he was killed, though Ferguson Police Chief Thomas Jackson said Wilson did not know Brown was a suspect when he encountered him walking in the street with a friend.", + "length": 317 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.720743864774704 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:17.66805646Z", + "first_section_created": "2025-12-23T09:33:17.668410474Z", + "last_section_published": "2025-12-23T09:33:17.668910795Z", + "all_results_received": "2025-12-23T09:33:17.797920111Z", + "output_generated": "2025-12-23T09:33:17.798124019Z", + "total_processing_time_ms": 130, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 129, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:17.668410474Z", + "publish_time": "2025-12-23T09:33:17.668721187Z", + "first_worker_start": "2025-12-23T09:33:17.669258809Z", + "last_worker_end": "2025-12-23T09:33:17.771642Z", + "total_journey_time_ms": 103, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:17.669290011Z", + "start_time": "2025-12-23T09:33:17.669361113Z", + "end_time": "2025-12-23T09:33:17.669453417Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:17.669555Z", + "start_time": "2025-12-23T09:33:17.669701Z", + "end_time": "2025-12-23T09:33:17.771642Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 101 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:17.669213407Z", + "start_time": "2025-12-23T09:33:17.669309611Z", + "end_time": "2025-12-23T09:33:17.669438917Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:17.669178906Z", + "start_time": "2025-12-23T09:33:17.669258809Z", + "end_time": "2025-12-23T09:33:17.669312511Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:17.668806791Z", + "publish_time": "2025-12-23T09:33:17.668910795Z", + "first_worker_start": "2025-12-23T09:33:17.669256909Z", + "last_worker_end": "2025-12-23T09:33:17.796682Z", + "total_journey_time_ms": 127, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:17.669391715Z", + "start_time": "2025-12-23T09:33:17.669437717Z", + "end_time": "2025-12-23T09:33:17.669455617Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:17.669585Z", + "start_time": "2025-12-23T09:33:17.669702Z", + "end_time": "2025-12-23T09:33:17.796682Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 126 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:17.669345213Z", + "start_time": "2025-12-23T09:33:17.669454117Z", + "end_time": "2025-12-23T09:33:17.669474818Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:17.669226008Z", + "start_time": "2025-12-23T09:33:17.669256909Z", + "end_time": "2025-12-23T09:33:17.66926571Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 227, + "min_processing_ms": 101, + "max_processing_ms": 126, + "avg_processing_ms": 113, + "median_processing_ms": 126, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2796, + "slowest_section_id": 1, + "slowest_section_time_ms": 127 + } +} diff --git a/data/output/0024a4404120db4544060c11050acaaae185f315.json b/data/output/0024a4404120db4544060c11050acaaae185f315.json new file mode 100644 index 0000000..64565cf --- /dev/null +++ b/data/output/0024a4404120db4544060c11050acaaae185f315.json @@ -0,0 +1,310 @@ +{ + "file_name": "0024a4404120db4544060c11050acaaae185f315.txt", + "total_words": 802, + "top_n_words": [ + { + "word": "the", + "count": 45 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "in", + "count": 22 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "obama", + "count": 12 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "may", + "count": 10 + }, + { + "word": "romney", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Jacobs.", + "length": 7 + }, + { + "text": "What gives?", + "length": 11 + }, + { + "text": "Ditto on youth.", + "length": 15 + }, + { + "text": "Give Romney some credit.", + "length": 24 + }, + { + "text": "He's made a shrewd move.", + "length": 24 + }, + { + "text": "Bush the independent vote.", + "length": 26 + }, + { + "text": "But -- there's always a but.", + "length": 28 + }, + { + "text": "Conservatives outnumber liberals 2 to 1 (40% to 21%).", + "length": 53 + }, + { + "text": "Embracing the base and scorning the rush to the middle cost George W.", + "length": 69 + }, + { + "text": "Did the looming prospect of defeat push Romney into a desperate gamble?", + "length": 71 + }, + { + "text": "The opinions expressed in this commentary are solely those of Lawrence R.", + "length": 73 + }, + { + "text": "Strap in, folks, 2012 may be much more interesting and close than we'd imagined.", + "length": 80 + }, + { + "text": "Rage against Obama has the GOP ready to walk over red hot coals to cast a ballot.", + "length": 81 + }, + { + "text": "But Bush also supercharged conservatives and Republicans, who turned out in droves.", + "length": 83 + }, + { + "text": "Alarmed by his draconian proposals to remake Medicare, they may boost their support of Obama.", + "length": 93 + }, + { + "text": "Just the opposite -- Romney has doubled down on his move to the right during the primary battle.", + "length": 96 + }, + { + "text": "But using Ryan to ignite the Republican base is probably Romney's most plausible path to prevailing.", + "length": 100 + }, + { + "text": "The Ryan choice adopts a strategy premised on supermobilizing the base and luring a smidgeon of others.", + "length": 103 + }, + { + "text": "Put on your thinking caps and grab an abacus, here are the numbers that could put Romney in the White House.", + "length": 108 + }, + { + "text": "Another potential risk: A good number of voters may be primed to punish the incumbent for poor economic times.", + "length": 110 + }, + { + "text": "But the coming hullabaloo over Ryan's budget proposals may distract the economically pained from punishing Obama.", + "length": 113 + }, + { + "text": "Pluralities of Ohio and Florida independents report that Obama's re-election would hurt their personal financial situation.", + "length": 123 + }, + { + "text": "Even as Ryan fires up conservatives, he may also mobilize votes for Obama -- including senior citizens who reside in key swing states like Florida.", + "length": 147 + }, + { + "text": "And, it may produce a campaign focused a bit more on policy than on birth certificates, service records and the other side issues of recent elections.", + "length": 150 + }, + { + "text": "Refuting the conventional wisdom that Democrats do best in high-turnout elections, it was Bush who most benefited from the 16% jump in the total vote.", + "length": 150 + }, + { + "text": "All in all, Romney has a tough battle ahead -- even stringent counts of Electoral College votes based on polls show Obama within striking distance of winning.", + "length": 158 + }, + { + "text": "Even with Obama's pro-immigration shift and the growing number of Latinos in competitive states, their actual turnout may flag from their record numbers in 2008.", + "length": 161 + }, + { + "text": "Blue collar voters -- never drawn to Obama (think Hillary Clinton in 2008 Democratic primaries) -- may desert him in numbers that approach the \"Reagan Democrat\" defections in 1980.", + "length": 180 + }, + { + "text": "This possible weakness in the Democratic coalition coincides with a bit more slippage among Obama's 2008 supporters (9%) than among McCain voters who won't vote GOP in November (5%).", + "length": 182 + }, + { + "text": "Ryan may be many things -- energetic, charismatic and geeky -- but no one familiar with his Full Monty conservative budgets would describe his selection as remotely moving to the center.", + "length": 186 + }, + { + "text": "Less than half of Hispanics eligible to vote are registering and only 64% of Hispanics say they will definitely vote as compared to their 77% response in 2008 and the national average of 78% today.", + "length": 197 + }, + { + "text": "To make sure they harvest the Ryan enthusiasts, the Romney campaign appears to be assembling an impressive operation to turn out the vote and to aggressively compete with the Obama team for the early vote.", + "length": 205 + }, + { + "text": "Bottom line: By picking the bona fide conservative Ryan, the Republican base is likely to deliver a rapturous response, which may allow Romney to succeed in exploiting Obama's greatest weakness at this point.", + "length": 208 + }, + { + "text": "The percentage of voters 18 to 29 who say they will definitely vote in November (58%) is currently running 20 points or more behind the national average today (78%) or the youth turnout in 2008 (78%) or 2004 (81%).", + "length": 214 + }, + { + "text": "Before you conclude this is far-fetched, think back to Karl Rove's strategy in 2004 to move right with strident social conservatism on abortion and same-sex marriage, steep tax cuts and hawkish policies in Afghanistan and Iraq.", + "length": 227 + }, + { + "text": "What makes the Romney mobilization particularly threatening to Obama is that it targets his biggest challenge -- polls consistently show him ahead but there are ominous signs that a decisive group of those supporters won't actually cast a ballot.", + "length": 246 + }, + { + "text": "A mainstay of Gallup's measure for determining who is likely to vote -- whether survey respondents are thinking a lot about the election -- shows not only that Republicans are more attentive than Democrats by 13 points but also more fired up than in recent presidential elections.", + "length": 280 + }, + { + "text": "(CNN) -- By now, we're on the same page that Mitt Romney's pick of Paul Ryan as his running mate contradicts a golden oldie of presidential election strategy -- run to the conservative (or liberal) base to win the nomination and then reposition toward the center to lure the more moderate independent swing voters who are necessary to win the general election.", + "length": 360 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5087285041809082 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:18.169676729Z", + "first_section_created": "2025-12-23T09:33:18.169959641Z", + "last_section_published": "2025-12-23T09:33:18.17019005Z", + "all_results_received": "2025-12-23T09:33:18.230515636Z", + "output_generated": "2025-12-23T09:33:18.230734045Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:18.169959641Z", + "publish_time": "2025-12-23T09:33:18.17019005Z", + "first_worker_start": "2025-12-23T09:33:18.170632669Z", + "last_worker_end": "2025-12-23T09:33:18.229648Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:18.170618668Z", + "start_time": "2025-12-23T09:33:18.170724172Z", + "end_time": "2025-12-23T09:33:18.170826777Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:18.170843Z", + "start_time": "2025-12-23T09:33:18.170999Z", + "end_time": "2025-12-23T09:33:18.229648Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:18.170594467Z", + "start_time": "2025-12-23T09:33:18.17067817Z", + "end_time": "2025-12-23T09:33:18.170790675Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:18.170545565Z", + "start_time": "2025-12-23T09:33:18.170632669Z", + "end_time": "2025-12-23T09:33:18.17067587Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4779, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/0024cc21f33090c48c8b7d46233b705f42218b3b.json b/data/output/0024cc21f33090c48c8b7d46233b705f42218b3b.json new file mode 100644 index 0000000..b9b28b1 --- /dev/null +++ b/data/output/0024cc21f33090c48c8b7d46233b705f42218b3b.json @@ -0,0 +1,448 @@ +{ + "file_name": "0024cc21f33090c48c8b7d46233b705f42218b3b.txt", + "total_words": 1251, + "top_n_words": [ + { + "word": "the", + "count": 73 + }, + { + "word": "to", + "count": 56 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "that", + "count": 27 + }, + { + "word": "of", + "count": 26 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "on", + "count": 16 + }, + { + "word": "it", + "count": 15 + }, + { + "word": "press", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Not yet that is.", + "length": 16 + }, + { + "text": "19:26 EST, 17 March 2013 .", + "length": 26 + }, + { + "text": "18:55 EST, 17 March 2013 .", + "length": 26 + }, + { + "text": "Defining the Dowler test is key.", + "length": 32 + }, + { + "text": "James Chapman and Steve Doughty .", + "length": 33 + }, + { + "text": "We can use these words against him.", + "length": 35 + }, + { + "text": "The Prime Minister cannot go back on it.", + "length": 40 + }, + { + "text": "We need to define what the Dowler test actually is.", + "length": 51 + }, + { + "text": "Any agreement must be on the basis of our Royal Charter.", + "length": 56 + }, + { + "text": "The opposition must not be allowed to define it, we must.", + "length": 57 + }, + { + "text": "We are planning to go ahead with the votes in the Commons.", + "length": 58 + }, + { + "text": "Around 20 Tories are thought to be ready to back the Lib/Lab pact.", + "length": 66 + }, + { + "text": "A senior source said: ‘We are in lock-step with the Lib Dems on this.", + "length": 71 + }, + { + "text": "To achieve this, the organisation intended to invent a ‘Dowler test’.", + "length": 73 + }, + { + "text": "’ Labour said it had not been approached over any new cross-party talks.", + "length": 74 + }, + { + "text": "The memo indicates that Hacked Off and its leaders regard such MPs with distaste.", + "length": 81 + }, + { + "text": "A family torn apart: Murdered schoolgirl Milly Dowler, 13, with her mother Sally .", + "length": 82 + }, + { + "text": "‘The Dowler family must support that definition; it is in their name that we act.", + "length": 83 + }, + { + "text": "‘Our messages will differ depending on whether these groups are left or right-leaning.", + "length": 88 + }, + { + "text": "‘Ultimately we are not about grandstanding on this; we are about getting a law that works.", + "length": 92 + }, + { + "text": "He said publishers ‘have all agreed to have a new body and a fresh start – with teeth’.", + "length": 93 + }, + { + "text": "’ The memo then says that Milly Dowler’s parents must be seen to back the Hacked Off test.", + "length": 94 + }, + { + "text": "It also advocated approaching disillusioned Right-wing Tory MPs to undermine Mr Cameron on his own back benches.", + "length": 112 + }, + { + "text": "‘I think their climbdown from that position has put them much closer to our position, and that is to be welcomed.", + "length": 115 + }, + { + "text": "’ Mr Miliband urged MPs to ‘stand up for the victims’ of Press intrusion by enshrining a new regulator in law.", + "length": 116 + }, + { + "text": "It said that Hacked Off itself would make up the Dowler test, and ‘the Dowler family must support that definition’.", + "length": 119 + }, + { + "text": "Both Ed Miliband and Nick Clegg have repeatedly referred to the Dowlers in their recent public statements on Press regulation.", + "length": 126 + }, + { + "text": "The document said that Hacked Off has just one aim, to establish ‘an independent statutory regulator with punitive powers’.", + "length": 127 + }, + { + "text": "‘There is still an opportunity for us to get together and get a Press law that works,’ he told the BBC’s Andrew Marr Show.", + "length": 128 + }, + { + "text": "The Hacked Off document said: ‘He (Cameron) would favour a robust form of self-regulation as long as it passed the Dowler test.", + "length": 129 + }, + { + "text": "What was proposed was ‘probably the strongest regulatory body in Europe: ability to fine; ability to carry out investigations’.", + "length": 131 + }, + { + "text": "Talks on a new Press regulation system were on a knife-edge last night as David Cameron tried to head off a damaging Commons defeat.", + "length": 132 + }, + { + "text": "Culture Secretary Maria Miller claimed a ‘climbdown’ by Mr Miliband had brought the Opposition ‘much closer to our position’.", + "length": 133 + }, + { + "text": "The memo said: ‘The expression the Dowler test has pushed its way into the popular political lexicon, when in fact it doesn’t exist.", + "length": 136 + }, + { + "text": "His deputy Harriet Harman, the party’s media spokesman, suggested it wanted the Press to be prevented from writing about private lives.", + "length": 137 + }, + { + "text": "But Downing Street also appeared ready to give ground in an attempt to head off what looked like inevitable defeat in Parliament tonight.", + "length": 137 + }, + { + "text": "It added that Hacked Off would say different things to different people depending on what it thought they were most likely to want to hear.", + "length": 139 + }, + { + "text": "The PM also said the present Press self-regulation system had failed families such as the Dowlers and the parents of missing Madeleine McCann .", + "length": 143 + }, + { + "text": "Chancellor George Osborne said he hoped a last-minute deal could be found, warning that a regulatory system without cross-party support was unlikely to last.", + "length": 157 + }, + { + "text": "A ‘strategy document’ drawn up by Hacked Off set out plans to draw up a ‘Dowler test’ that it could use to force David Cameron into shackling newspapers.", + "length": 161 + }, + { + "text": "The three party leaders were inching towards a deal on a Royal Charter to enshrine a powerful newspaper watchdog which would implement fines of up to £1million.", + "length": 161 + }, + { + "text": "’ Hacked Off confirmed yesterday that the document, leaked to the Mail on Sunday by a whistleblower said to be disenchanted with the group’s message, was genuine.", + "length": 166 + }, + { + "text": "Tory whips have warned the Prime Minister he is likely to face defeat thanks to combined Labour and Lib Dem votes if he presses ahead with his version of a Royal Charter.", + "length": 170 + }, + { + "text": "Yesterday, however, it emerged that Mr Cameron and his Lib Dem deputy Nick Clegg had met face to face and reopened negotiations ahead of a crunch Commons vote due tonight.", + "length": 171 + }, + { + "text": "‘They have all signed up to that and they just say “we don’t need statute; the last thing we should do is send a message across the world that the UK now has a Press law.", + "length": 176 + }, + { + "text": "’ The document, drawn up by Hacked Off head of campaigns Ella Mason, was presented to a meeting at the London headquarters of law firm Mishcon de Reya on 19 September last year.", + "length": 179 + }, + { + "text": "Tory sources claimed that Labour and the Liberal Democrats – who joined forces to try to force through laws shackling the 300-year-old free Press – had offered key concessions.", + "length": 180 + }, + { + "text": "’ Mrs Miller added: ‘Labour has been trying to push through a tough form of statutory regulation for the Press with really unacceptable consequences for freedom of speech in this country.", + "length": 191 + }, + { + "text": "’ The pressure group pushing for statutory Press regulation plotted to exploit the parents of murdered Milly Dowler to achieve its political aims, a leaked briefing note revealed yesterday.", + "length": 191 + }, + { + "text": "Culture Secretary Maria Miller claimed a 'climbdown' by Mr Miliband had brought the parties closer to launching a powerful newspaper watchdog which would implement fines of up to £1million .", + "length": 191 + }, + { + "text": "The exploitation of Bob and Sally Dowler – described as ‘hypocrisy and cynicism’ by senior Tory MP David Davis – is outlined in the leaked memo produced for leaders of Hacked Off in September.", + "length": 200 + }, + { + "text": "Cross-party talks on how to take forward the recommendations of Lord Justice  Leveson’s report on media standards collapsed last week when the Prime Minister lost patience with fresh Labour demands.", + "length": 201 + }, + { + "text": "The meeting was held in the office of Charlotte Harris, a lawyer representing a number of phone hacking victims, and was attended by the group’s founder Brian Cathcart and Hugh Grant, one of its directors.", + "length": 207 + }, + { + "text": "‘There are a lot of things that can turn people’s lives upside down that are not criminal offences and that’s what we need to ensure that we protect people from,’ she told the BBC’s Sunday Politics.", + "length": 208 + }, + { + "text": "Differences between the parties remained over key details of the new rules governing the Press, including the prominence of apologies required by a new regulator and the right of the industry to object to who sits on it.", + "length": 220 + }, + { + "text": "Press Complaints Commission chairman Lord Hunt said publishers were ready to sign up to a tough new ‘Leveson-compliant’ watchdog but believed statutory underpinning would send a dangerous signal to the world about Press freedom.", + "length": 232 + }, + { + "text": "Mr Cameron last night appeared ready to contemplate a single clause in legislation to ensure that the Royal Charter could not be amended in the future without two-thirds majorities in both Houses of Parliament and the agreement of all three party leaders.", + "length": 255 + }, + { + "text": "’ It referred to the Prime Minister’s evidence to Lord Justice Leveson last June in which Mr Cameron said that the present Press self-regulation system had failed families such as the Dowlers and the parents of missing Madeleine McCann and that was ‘the test’.", + "length": 268 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5534603744745255 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:18.670971685Z", + "first_section_created": "2025-12-23T09:33:18.671378702Z", + "last_section_published": "2025-12-23T09:33:18.671674514Z", + "all_results_received": "2025-12-23T09:33:18.764050821Z", + "output_generated": "2025-12-23T09:33:18.764306931Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:18.671378702Z", + "publish_time": "2025-12-23T09:33:18.671594211Z", + "first_worker_start": "2025-12-23T09:33:18.672078331Z", + "last_worker_end": "2025-12-23T09:33:18.759691Z", + "total_journey_time_ms": 88, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:18.672125333Z", + "start_time": "2025-12-23T09:33:18.672198636Z", + "end_time": "2025-12-23T09:33:18.672339041Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:18.672425Z", + "start_time": "2025-12-23T09:33:18.672565Z", + "end_time": "2025-12-23T09:33:18.759691Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 87 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:18.672011328Z", + "start_time": "2025-12-23T09:33:18.672078331Z", + "end_time": "2025-12-23T09:33:18.672502848Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:18.672077131Z", + "start_time": "2025-12-23T09:33:18.672183435Z", + "end_time": "2025-12-23T09:33:18.672233237Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:18.671632912Z", + "publish_time": "2025-12-23T09:33:18.671674514Z", + "first_worker_start": "2025-12-23T09:33:18.672156034Z", + "last_worker_end": "2025-12-23T09:33:18.763291Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:18.672210436Z", + "start_time": "2025-12-23T09:33:18.672250738Z", + "end_time": "2025-12-23T09:33:18.67229904Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:18.672481Z", + "start_time": "2025-12-23T09:33:18.672614Z", + "end_time": "2025-12-23T09:33:18.763291Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:18.672206936Z", + "start_time": "2025-12-23T09:33:18.672247338Z", + "end_time": "2025-12-23T09:33:18.67229874Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:18.672113032Z", + "start_time": "2025-12-23T09:33:18.672156034Z", + "end_time": "2025-12-23T09:33:18.672195236Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 177, + "min_processing_ms": 87, + "max_processing_ms": 90, + "avg_processing_ms": 88, + "median_processing_ms": 90, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3677, + "slowest_section_id": 1, + "slowest_section_time_ms": 91 + } +} diff --git a/data/output/002509a01890dd51476aa84c634b6c1db306f995.json b/data/output/002509a01890dd51476aa84c634b6c1db306f995.json new file mode 100644 index 0000000..bf0a7ec --- /dev/null +++ b/data/output/002509a01890dd51476aa84c634b6c1db306f995.json @@ -0,0 +1,380 @@ +{ + "file_name": "002509a01890dd51476aa84c634b6c1db306f995.txt", + "total_words": 900, + "top_n_words": [ + { + "word": "the", + "count": 72 + }, + { + "word": "of", + "count": 31 + }, + { + "word": "in", + "count": 30 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "rights", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "that", + "count": 15 + }, + { + "word": "human", + "count": 14 + }, + { + "word": "statute", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "rights?", + "length": 7 + }, + { + "text": "From Franklin D.", + "length": 16 + }, + { + "text": "But the larger question is: Does the U.", + "length": 39 + }, + { + "text": "In the wake of the case, Filártiga v.", + "length": 39 + }, + { + "text": "The United States stands at a crossroads.", + "length": 41 + }, + { + "text": "The Supreme Court court accepted Kiobel v.", + "length": 42 + }, + { + "text": "away from its once leading advocacy for human rights.", + "length": 53 + }, + { + "text": "Royal Dutch Petroleum will have enormous significance.", + "length": 55 + }, + { + "text": "Shell Oil must aid Nigeria workers who were tortured, abused .", + "length": 62 + }, + { + "text": "They ordered the case to be re-argued on exactly that question.", + "length": 63 + }, + { + "text": "(CNN) -- An argument before the Supreme Court on October 1 in Kiobel v.", + "length": 72 + }, + { + "text": "Passed in 1789, the Alien Tort Statute was a prescient piece of legislation.", + "length": 76 + }, + { + "text": "The opinions expressed in this commentary are solely those of Vincent Warren.", + "length": 77 + }, + { + "text": "It is this legacy that is at stake in the Kiobel case before the Supreme Court.", + "length": 81 + }, + { + "text": "Peña-Irala, the Alien Tort Statute developed into a new tool in human rights law.", + "length": 82 + }, + { + "text": "The young man had been tortured to death because his father opposed the government.", + "length": 83 + }, + { + "text": "Alfredo Stroessner, brought and won a civil case against his murderer, Americo Peña-Irala.", + "length": 91 + }, + { + "text": "The international leadership of the past century is a long way from where we find ourselves now.", + "length": 96 + }, + { + "text": "It allows foreign victims of human rights abuses in foreign nations to seek civil remedies in U.", + "length": 96 + }, + { + "text": "Since 1977, the State Department has annually produced Country Reports on Human Rights Practices.", + "length": 97 + }, + { + "text": "Kiobel was executed through a sham trial process in which the plaintiffs believe Shell played a central role.", + "length": 109 + }, + { + "text": "Royal Dutch Petroleum last fall after a federal appeals court ruled that the statute could not be used to sue corporations.", + "length": 123 + }, + { + "text": "At its best, our nation has played a crucial role in championing human rights throughout the world and pioneering human rights law.", + "length": 131 + }, + { + "text": "On this grim and morally and legally compromised horizon, the Alien Tort Statute is still one bright spot for human rights advocacy.", + "length": 132 + }, + { + "text": "All the plaintiffs were themselves tortured except Esther Kiobel, who brought her claims on behalf of her late husband, Barinem Kiobel.", + "length": 135 + }, + { + "text": "The ruling established that the statute could be used to hold modern torturers accountable for their actions, wherever they are committed.", + "length": 138 + }, + { + "text": "\"The United States is abandoning its role as the global champion of human rights,\" Jimmy Carter wrote bluntly in The New York Times in June.", + "length": 140 + }, + { + "text": "If the statute is narrowed and its promise of universal accountability curtailed, it will rightly be perceived as yet another step by the U.", + "length": 140 + }, + { + "text": "In a groundbreaking case in 1980, the family of a 17-year-old Paraguayan, Joelito Filártiga, who had been tortured and killed by a henchman of Gen.", + "length": 148 + }, + { + "text": "The case concerns the torture of Ogoni leaders in Nigeria, but at stake is the future of the law under which this case was brought, the Alien Tort Statute.", + "length": 155 + }, + { + "text": "At its worst, it has abandoned its lofty ideals in the name of realpolitik and supported dictators and policies that were responsible for horrible abuses.", + "length": 156 + }, + { + "text": "A popular movement of the Ogoni people resisting what they saw as reckless oil development in the region was violently suppressed by Nigeria's military dictatorship.", + "length": 165 + }, + { + "text": "courts, and its animating idea -- that people anywhere should have recourse for violations of the \"law of nations\" -- was the foundation of our modern understanding of human rights.", + "length": 181 + }, + { + "text": "The case has been brought in the United States because of our nation's historical role in promoting the idea of universal rights and in the development of international human rights law.", + "length": 186 + }, + { + "text": "In the 1990s, Royal Dutch Petroleum (Shell) had extensive oil drilling operations in the Niger Delta in Nigeria, a region long plagued with poverty, human rights violations and environmental disaster.", + "length": 200 + }, + { + "text": "Our own era is defined by a different legacy: one of waterboarding and \"torture memos,\" extraordinary renditions, indefinite detention at Guantánamo Bay and targeted killings in countries with which we are not at war.", + "length": 218 + }, + { + "text": "Successful cases were brought against government officials, against non-state actors like Radovan Karadžić in Bosnia-Herzegovina and against multinational corporations before the Second Circuit ruling in Kiobel that disallowed that.", + "length": 234 + }, + { + "text": "If, on the other hand, the Supreme Court upholds the Alien Tort Statute, it will signal to the world that we do still believe that people everywhere are entitled to certain fundamental rights and that we will help enforce those rights.", + "length": 235 + }, + { + "text": "The justices indicated in February that they might question not just the application of the statute to corporations but whether and under what circumstances it applies to any human rights violations, even by individuals, that take place outside the United States.", + "length": 263 + }, + { + "text": "In the suit, the plaintiffs accuse Royal Dutch Shell of helping the former dictatorship in the arrests on false charges and torture of 12 members of the Ogoni tribe, who sought to peacefully disrupt Shell's operations because of the devastating health and environmental effects of unregulated drilling.", + "length": 302 + }, + { + "text": "The immediate questions before the court on October 1 concern the reach of the Alien Tort Statute and whether it will continue to be possible for people like the Filártigas and the Kiobels to pursue their tormentors and hold them accountable for their heinous acts, and whether corporations can be held to account.", + "length": 315 + }, + { + "text": "Roosevelt's Four Freedoms speech and the Universal Declaration of Human Rights that Eleanor Roosevelt tirelessly worked for, to the stirring oratory of Robert Jackson at the Nuremberg Tribunal, mid-century Americans gave voice and visibility to the idea that all people, everywhere, were entitled to certain fundamental rights.", + "length": 327 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5290754288434982 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:19.172433248Z", + "first_section_created": "2025-12-23T09:33:19.172769962Z", + "last_section_published": "2025-12-23T09:33:19.173304084Z", + "all_results_received": "2025-12-23T09:33:19.295933937Z", + "output_generated": "2025-12-23T09:33:19.296135745Z", + "total_processing_time_ms": 123, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 122, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:19.172769962Z", + "publish_time": "2025-12-23T09:33:19.173103076Z", + "first_worker_start": "2025-12-23T09:33:19.173516693Z", + "last_worker_end": "2025-12-23T09:33:19.244007Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:19.173535493Z", + "start_time": "2025-12-23T09:33:19.173613097Z", + "end_time": "2025-12-23T09:33:19.173729701Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:19.17388Z", + "start_time": "2025-12-23T09:33:19.174021Z", + "end_time": "2025-12-23T09:33:19.244007Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:19.173598396Z", + "start_time": "2025-12-23T09:33:19.173681399Z", + "end_time": "2025-12-23T09:33:19.173797804Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:19.173437489Z", + "start_time": "2025-12-23T09:33:19.173516693Z", + "end_time": "2025-12-23T09:33:19.173555194Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:19.17320018Z", + "publish_time": "2025-12-23T09:33:19.173304084Z", + "first_worker_start": "2025-12-23T09:33:19.173702Z", + "last_worker_end": "2025-12-23T09:33:19.295078Z", + "total_journey_time_ms": 121, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:19.1736961Z", + "start_time": "2025-12-23T09:33:19.173723801Z", + "end_time": "2025-12-23T09:33:19.173737502Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:19.173939Z", + "start_time": "2025-12-23T09:33:19.174057Z", + "end_time": "2025-12-23T09:33:19.295078Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 121 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:19.173672899Z", + "start_time": "2025-12-23T09:33:19.173702Z", + "end_time": "2025-12-23T09:33:19.173718501Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:19.173629497Z", + "start_time": "2025-12-23T09:33:19.1737023Z", + "end_time": "2025-12-23T09:33:19.173711001Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 190, + "min_processing_ms": 69, + "max_processing_ms": 121, + "avg_processing_ms": 95, + "median_processing_ms": 121, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2757, + "slowest_section_id": 1, + "slowest_section_time_ms": 121 + } +} diff --git a/data/output/00258c300ca824ab90db12db58480b00040a96d1.json b/data/output/00258c300ca824ab90db12db58480b00040a96d1.json new file mode 100644 index 0000000..3600843 --- /dev/null +++ b/data/output/00258c300ca824ab90db12db58480b00040a96d1.json @@ -0,0 +1,230 @@ +{ + "file_name": "00258c300ca824ab90db12db58480b00040a96d1.txt", + "total_words": 386, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "on", + "count": 6 + }, + { + "word": "that", + "count": 6 + }, + { + "word": "government", + "count": 5 + }, + { + "word": "hong", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "20 years on: Tiananmen remembered .", + "length": 35 + }, + { + "text": "CNN's Aliza Kassim contributed to this report.", + "length": 46 + }, + { + "text": "Hong Kong police called it a \"peaceful gathering.", + "length": 49 + }, + { + "text": "When the firing stopped, hundreds if not thousands of people lay maimed or dead.", + "length": 80 + }, + { + "text": "Saturday's protest is an annual event organized by the Hong Kong Alliance, a pro-democracy group.", + "length": 97 + }, + { + "text": "\" Images of the demonstration showed a sea of flickering candles covering the length of the park.", + "length": 97 + }, + { + "text": "On June 4, 1989, Chinese troops in armored personnel carriers and tanks rumbled toward Tiananmen Square.", + "length": 104 + }, + { + "text": "A little more than 22 years ago, students gathered in Tiananmen Square to memorialize the recently deceased Hu Yaobang.", + "length": 119 + }, + { + "text": "Along the way, they met fierce resistance from students and city residents who barricaded the streets, so they fired at them.", + "length": 125 + }, + { + "text": "The soldiers, on strict orders to clear the square of demonstrators, had forced their way through the city's main thoroughfare.", + "length": 127 + }, + { + "text": "The candlelight vigil comes after recent efforts by the Chinese government to quash would-be demonstrators from holding anti-government protests.", + "length": 145 + }, + { + "text": "He was fired as Communist Party chief in 1987 by Deng Xiaoping for pushing policies deemed too soft toward \"bourgeois-liberal ideas\" and tolerating student protests.", + "length": 165 + }, + { + "text": "The April 15 memorial quickly turned into a pro-democracy movement, and students held talks with the government and later a hunger strike in Tiananmen Square to press their cause.", + "length": 179 + }, + { + "text": "Relatives of victims renew their hopes every year that Beijing's leaders will reverse the verdict that the protests were a counter-revolutionary rebellion that had to be put down.", + "length": 179 + }, + { + "text": "Hong Kong (CNN) -- Thousands of people filled Hong Kong's Victoria Park on Saturday to mark the 22nd anniversary of the bloody crackdown on pro-democracy protesters in Tiananmen Square.", + "length": 185 + }, + { + "text": "The government also tightened rules on foreign reporters, explicitly warning them that they risk detention, suspension of press cards and expulsion if they show up at planned demonstrations.", + "length": 190 + }, + { + "text": "In response to the campaign, authorities deployed heavy security along major thoroughfares, especially in Wangfujing, a busy shopping street in downtown Beijing that had been designated by the online group for protests.", + "length": 219 + }, + { + "text": "About 26 people were arrested between February and March, according to a Hong Kong-based human rights group, when an anonymous group began an internet campaign calling for anti-government protests in China similar to ones that have taken hold in the Middle East.", + "length": 262 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7606622576713562 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:19.674135921Z", + "first_section_created": "2025-12-23T09:33:19.674462334Z", + "last_section_published": "2025-12-23T09:33:19.674628141Z", + "all_results_received": "2025-12-23T09:33:19.735049031Z", + "output_generated": "2025-12-23T09:33:19.735213338Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:19.674462334Z", + "publish_time": "2025-12-23T09:33:19.674628141Z", + "first_worker_start": "2025-12-23T09:33:19.675193164Z", + "last_worker_end": "2025-12-23T09:33:19.734115Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:19.675137562Z", + "start_time": "2025-12-23T09:33:19.675208065Z", + "end_time": "2025-12-23T09:33:19.675250367Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:19.675466Z", + "start_time": "2025-12-23T09:33:19.675585Z", + "end_time": "2025-12-23T09:33:19.734115Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:19.675136562Z", + "start_time": "2025-12-23T09:33:19.675193164Z", + "end_time": "2025-12-23T09:33:19.675261467Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:19.675144962Z", + "start_time": "2025-12-23T09:33:19.675214865Z", + "end_time": "2025-12-23T09:33:19.675241966Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2419, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/0026c5543841760639093a4d21fdf0ad123efd07.json b/data/output/0026c5543841760639093a4d21fdf0ad123efd07.json new file mode 100644 index 0000000..1a9dd19 --- /dev/null +++ b/data/output/0026c5543841760639093a4d21fdf0ad123efd07.json @@ -0,0 +1,294 @@ +{ + "file_name": "0026c5543841760639093a4d21fdf0ad123efd07.txt", + "total_words": 675, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "was", + "count": 14 + }, + { + "word": "dogs", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "their", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "77.", + "length": 3 + }, + { + "text": "Dave is circled .", + "length": 17 + }, + { + "text": "'It is disgusting.", + "length": 18 + }, + { + "text": "What a difference!", + "length": 18 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'That amount of money is amazing.", + "length": 33 + }, + { + "text": "I have never seen a dog in this condition before.", + "length": 49 + }, + { + "text": "We have had masses of people wanting to re-home them.", + "length": 53 + }, + { + "text": "RSPCA inspector Jan Edwards said: 'There's a lot of love for these dogs.", + "length": 72 + }, + { + "text": "We are very very grateful for people that have given their hard earned cash.", + "length": 76 + }, + { + "text": "The total mass of hair weighed a staggering 12kg, filling three plastic bags.", + "length": 77 + }, + { + "text": "One couple, Roderick and Hazel Ross, from Warsash, Hants, pledged £2,000 to the cause.", + "length": 87 + }, + { + "text": "Happy again: The energetic white puppy was pictured today with RSPCA inspector Jan Edwards .", + "length": 93 + }, + { + "text": "' Speaking about the Ross' donation and Hazel, she said: 'Their donation was above and beyond.", + "length": 94 + }, + { + "text": "And donations from as far as Canada came in after their heartbreaking story reached the country.", + "length": 96 + }, + { + "text": "Saved: One of the dogs is pictured after being painstakingly shaved of its matted fur last month .", + "length": 98 + }, + { + "text": "Bags of fur: The matted and filthy hair which was shaved from the dogs was enough to fill three large sacks .", + "length": 109 + }, + { + "text": "'I want to go and visit them, show them pictures of the dogs, talk about my experiences with them, and thank them.", + "length": 114 + }, + { + "text": "'It's going to cost around that much to keep the dogs because they will all not be well enough for re-homing for some time.", + "length": 123 + }, + { + "text": "Donations for the dogs have flooded in from members of the public hoping to give the animals a healthy future - raising £4,298.", + "length": 128 + }, + { + "text": "Appalling treatment: This harrowing picture was released by the RSPCA two weeks ago and shows two poodle cross puppies resembling bundles of rags.", + "length": 146 + }, + { + "text": "These heartwarming pictures capture the incredible transformation of a poodle cross puppy who just two weeks ago was barely recognisable as a dog.", + "length": 147 + }, + { + "text": "All nine dogs are still being given medical treatment and will not be able to go to new homes until they are healthy and also micro-chipped and neutered.", + "length": 153 + }, + { + "text": "The one-year-old animal, called Dave, was one of nine of the breed found in a shocking state - caked in mud while blinded by their horrendously matted fur.", + "length": 155 + }, + { + "text": "' All nine dogs are still being given medical treatment and will not be able to go to new homes until they are healthy and also micro-chipped and neutered .", + "length": 156 + }, + { + "text": "Three different RSPCA centres are now taking care of the canines - Mount Noddy, Chichester, West Sussex and Ashley Heath and The Ark, both in Stubbington, Hants.", + "length": 161 + }, + { + "text": "It took rescuers around three hours to shave each dog, and so terrified were the animals of being touched that vets fear they may have been beaten by their owners.", + "length": 163 + }, + { + "text": "This heartwarming image captures the incredible transformation of Dave the poodle cross puppy, who just two weeks ago was severely ill anf blinded by its matted fur .", + "length": 166 + }, + { + "text": "Dave was pictured bounding around a garden today with astounding energy considering he was unable to walk two weeks ago, while also suffering from severe illness and depression.", + "length": 177 + }, + { + "text": "Bounding around: It took rescuers around three hours to shave each dog, and so terrified were the animals of being touched that vets fear they may have been beaten by their owners .", + "length": 181 + }, + { + "text": "’ An animal welfare expert said it was likely the nine dogs had been kept in ‘unlivable’ conditions for their whole lives before being ditched ‘with no concern for their welfare’.", + "length": 189 + }, + { + "text": "New lease of life: The one-year-old, one of nine dogs abandoned on the side of the road in Winchester, was unable to stand just two weeks ago but was seen running around in a garden today .", + "length": 190 + }, + { + "text": "After the dogs were found, vet Martha Edwards said the puppies' mother Sian had to be sedated while her fur was cut off, adding: ‘She had stones trapped between her paw pads which had obviously been there for some time.", + "length": 221 + }, + { + "text": "After being found on the side of the road in Winchester, the dogs were painstakingly shaved and nursed back to healthy by the RSPCA - with their touching story prompting more than £4,000 in donations to pay for their care.", + "length": 223 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5572991371154785 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:20.174883454Z", + "first_section_created": "2025-12-23T09:33:20.17527327Z", + "last_section_published": "2025-12-23T09:33:20.175646886Z", + "all_results_received": "2025-12-23T09:33:20.245367659Z", + "output_generated": "2025-12-23T09:33:20.245544166Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:20.17527327Z", + "publish_time": "2025-12-23T09:33:20.175646886Z", + "first_worker_start": "2025-12-23T09:33:20.176015701Z", + "last_worker_end": "2025-12-23T09:33:20.244405Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:20.175973999Z", + "start_time": "2025-12-23T09:33:20.176048202Z", + "end_time": "2025-12-23T09:33:20.176162807Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:20.176482Z", + "start_time": "2025-12-23T09:33:20.17669Z", + "end_time": "2025-12-23T09:33:20.244405Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:20.176027301Z", + "start_time": "2025-12-23T09:33:20.176098704Z", + "end_time": "2025-12-23T09:33:20.176182008Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:20.175949598Z", + "start_time": "2025-12-23T09:33:20.176015701Z", + "end_time": "2025-12-23T09:33:20.176046902Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3840, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/002756fbd876d0da1c7d6e6d464c25d5316de3e8.json b/data/output/002756fbd876d0da1c7d6e6d464c25d5316de3e8.json new file mode 100644 index 0000000..0945161 --- /dev/null +++ b/data/output/002756fbd876d0da1c7d6e6d464c25d5316de3e8.json @@ -0,0 +1,182 @@ +{ + "file_name": "002756fbd876d0da1c7d6e6d464c25d5316de3e8.txt", + "total_words": 89, + "top_n_words": [ + { + "word": "to", + "count": 9 + }, + { + "word": "the", + "count": 4 + }, + { + "word": "s", + "count": 3 + }, + { + "word": "america", + "count": 2 + }, + { + "word": "back", + "count": 2 + }, + { + "word": "his", + "count": 2 + }, + { + "word": "is", + "count": 2 + }, + { + "word": "of", + "count": 2 + }, + { + "word": "with", + "count": 2 + }, + { + "word": "a", + "count": 1 + } + ], + "sorted_sentences": [ + { + "text": ".", + "length": 1 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "So who's with them?", + "length": 19 + }, + { + "text": "(CNN) -- As Barack Obama makes his case to the nation for taking the fight to ISIS, his top diplomat is also trying to make sure America doesn't have to go it alone.", + "length": 165 + }, + { + "text": "Secretary of State John Kerry is sweeping through the Middle East to try to convince regional leaders to back America's plan to beat back the terror group, which has seized a large chunk of territory stretching from northern Syria to central Iraq with alarming pace in recent months.", + "length": 283 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8298103213310242 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:20.676488523Z", + "first_section_created": "2025-12-23T09:33:20.677796577Z", + "last_section_published": "2025-12-23T09:33:20.677925582Z", + "all_results_received": "2025-12-23T09:33:20.738281669Z", + "output_generated": "2025-12-23T09:33:20.738405974Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:20.677796577Z", + "publish_time": "2025-12-23T09:33:20.677925582Z", + "first_worker_start": "2025-12-23T09:33:20.678436903Z", + "last_worker_end": "2025-12-23T09:33:20.737324Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:20.678495906Z", + "start_time": "2025-12-23T09:33:20.678551408Z", + "end_time": "2025-12-23T09:33:20.67859001Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:20.678714Z", + "start_time": "2025-12-23T09:33:20.678861Z", + "end_time": "2025-12-23T09:33:20.737324Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:20.678391502Z", + "start_time": "2025-12-23T09:33:20.678436903Z", + "end_time": "2025-12-23T09:33:20.678458604Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:20.678402302Z", + "start_time": "2025-12-23T09:33:20.678440704Z", + "end_time": "2025-12-23T09:33:20.678447004Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 476, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/00276e431130922bf0afc72a96e7c64d53b53c89.json b/data/output/00276e431130922bf0afc72a96e7c64d53b53c89.json new file mode 100644 index 0000000..cbaa01c --- /dev/null +++ b/data/output/00276e431130922bf0afc72a96e7c64d53b53c89.json @@ -0,0 +1,510 @@ +{ + "file_name": "00276e431130922bf0afc72a96e7c64d53b53c89.txt", + "total_words": 808, + "top_n_words": [ + { + "word": "the", + "count": 37 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "zara", + "count": 20 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "her", + "count": 15 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "as", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": ": .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Family.", + "length": 7 + }, + { + "text": "Asked .", + "length": 7 + }, + { + "text": "sudden.", + "length": 7 + }, + { + "text": "Former .", + "length": 8 + }, + { + "text": "’ But .", + "length": 9 + }, + { + "text": "Phillips.", + "length": 9 + }, + { + "text": "Like her .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "’ Former .", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "We made that!", + "length": 13 + }, + { + "text": "of the birth.", + "length": 13 + }, + { + "text": "got back home.", + "length": 14 + }, + { + "text": "Zara is very .", + "length": 14 + }, + { + "text": "from hospital.", + "length": 14 + }, + { + "text": "‘awesome’.", + "length": 14 + }, + { + "text": "2012 by Peter.", + "length": 14 + }, + { + "text": "Intriguingly, .", + "length": 15 + }, + { + "text": "It was all very .", + "length": 17 + }, + { + "text": "‘I think Zara .", + "length": 17 + }, + { + "text": "The accompanying .", + "length": 18 + }, + { + "text": "World Championships.", + "length": 20 + }, + { + "text": "She earns a living .", + "length": 20 + }, + { + "text": "Last night a source .", + "length": 21 + }, + { + "text": "They said: ‘It [the .", + "length": 23 + }, + { + "text": "were approached by Hello!", + "length": 25 + }, + { + "text": "and agreed to sign a deal.", + "length": 26 + }, + { + "text": "08:34 EST, 24 February 2014 .", + "length": 29 + }, + { + "text": "21:05 EST, 24 February 2014 .", + "length": 29 + }, + { + "text": "’ Zara also says there is a .", + "length": 31 + }, + { + "text": "sponsorship and client management.", + "length": 34 + }, + { + "text": "Zara’s willingness to strike a .", + "length": 34 + }, + { + "text": "Rebecca English Royal Correspondent .", + "length": 37 + }, + { + "text": "magazine, as well as its front cover.", + "length": 37 + }, + { + "text": "Its small London office was set up in .", + "length": 39 + }, + { + "text": "Zara] is itching to be out there again.", + "length": 39 + }, + { + "text": "Zara and Mike appear in this week's Hello!", + "length": 42 + }, + { + "text": "Mia and, of course, provide for her future.", + "length": 43 + }, + { + "text": "deals with firms including Land Rover and Rolex.", + "length": 48 + }, + { + "text": "Cover stars: Zara features on the cover of Hello!", + "length": 49 + }, + { + "text": "brother, Zara has no Royal title or taxpayer funding.", + "length": 53 + }, + { + "text": "Shetland pony lined up for her daughter to learn to ride.", + "length": 57 + }, + { + "text": "magazine with a placid looking baby Mia lying between them .", + "length": 60 + }, + { + "text": "The couple also reveal they aimed to conceive in time for a .", + "length": 61 + }, + { + "text": "baby was born, Mike says: ‘I sent a text to all the family.", + "length": 61 + }, + { + "text": "interview was brokered by SEL, an Australian firm dealing in .", + "length": 62 + }, + { + "text": "lawyer Niri Shan, head of intellectual property and media and .", + "length": 63 + }, + { + "text": "close to Zara insisted she and Mike had agreed to the deal as .", + "length": 63 + }, + { + "text": "magazine deal] was not, initially, something they intended to do.", + "length": 65 + }, + { + "text": "his wedding pictures to the same magazine for a reported £500,000.", + "length": 67 + }, + { + "text": "and happy so far’ while Mike, 35, describes becoming a father as .", + "length": 68 + }, + { + "text": "close to her grandmother, so of course she made a phone call once we .", + "length": 70 + }, + { + "text": "as a professional equestrian, funding her stables through sponsorship .", + "length": 71 + }, + { + "text": "‘international sporting figures’ rather than members of the Royal .", + "length": 71 + }, + { + "text": "days after their publicist denied they were planning to make money out .", + "length": 72 + }, + { + "text": "understands there will be a level of criticism directed at her but she .", + "length": 72 + }, + { + "text": "England rugby captain Mike says: ‘I can see that Z [his nickname for .", + "length": 72 + }, + { + "text": "whether Palace protocol obliged them to inform the Queen as soon as the .", + "length": 73 + }, + { + "text": "Princess Anne’s Gatcombe Park estate in Gloucestershire and come just .", + "length": 73 + }, + { + "text": "the Mail has learnt the deal was brokered late last week by the Olympic .", + "length": 73 + }, + { + "text": "clearly felt this was the best way to approach the level of interest in .", + "length": 73 + }, + { + "text": "interview sees Zara and Mike speak about Mia’s birth on January 17 and .", + "length": 74 + }, + { + "text": "even reveal how Zara called her grandmother as soon as she returned home .", + "length": 74 + }, + { + "text": "pictures had already been taken by an old schoolfriend of Zara when they .", + "length": 74 + }, + { + "text": "world champion three-day eventer Zara, 32, says Mia is ‘pretty relaxed .", + "length": 74 + }, + { + "text": "December or January birth as it will allow Zara to compete in August’s .", + "length": 74 + }, + { + "text": "medal-winning equestrian’s management company, Sports and Entertainment .", + "length": 75 + }, + { + "text": "first official pictures of Mia were taken at Zara and Mike’s cottage on .", + "length": 75 + }, + { + "text": "entertainment at Taylor Wessing, warned: ‘This is going to make it very .", + "length": 75 + }, + { + "text": "Limited, whose managing director is none other than Zara’s brother Peter .", + "length": 76 + }, + { + "text": "will bring back uncomfortable memories of her brother’s decision to sell .", + "length": 76 + }, + { + "text": "commercial deal over her daughter – who is 16th in line to the throne – .", + "length": 77 + }, + { + "text": "Checking in: Phillips takes a peek at baby Mia at the racecourse in Wiltshire .", + "length": 79 + }, + { + "text": "’ This is not the first time Zara or her family have struck a deal with Hello!", + "length": 80 + }, + { + "text": "New mum: Phillips and baby Mia were spotted enjoying the sunshine at the Barbury racecourse last week .", + "length": 103 + }, + { + "text": "\"' Mia's second cousin Prince George was born at the private Lindo Wing of St Mary's Hospital in Paddington, London .", + "length": 117 + }, + { + "text": "difficult for them to protect the privacy of their baby when she is photographed in the future, outside of any specific issue of harassment.", + "length": 140 + }, + { + "text": "The Queen’s grand-daughter, her husband Mike Tindall and their five-week-old daughter Mia Grace are today plastered over 13 pages of Hello!", + "length": 141 + }, + { + "text": "Zara described her husband, here with her at Prince George's christening, as a 'hands-on' father, while he gushed: 'I still keeping looking at Mia and thinking \"Oh!", + "length": 164 + }, + { + "text": "Zara Phillips has become the first senior member of the Royal Family to sell photographs of their new baby to a celebrity magazine – for a reported fee of £150,000.", + "length": 167 + }, + { + "text": "Quiet time: Phillips and Tindall, seen here leaving the Christmas Day service at Sandringham less than a month before she gave birth, have kept a low profile since the birth .", + "length": 175 + }, + { + "text": "The Mail understands senior officials at Buckingham Palace were not made aware of the lucrative deal in advance and, tellingly, a spokesman for the Queen refused to comment publicly yesterday.", + "length": 192 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.582904577255249 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:21.178413305Z", + "first_section_created": "2025-12-23T09:33:21.179058032Z", + "last_section_published": "2025-12-23T09:33:21.179385245Z", + "all_results_received": "2025-12-23T09:33:21.247847266Z", + "output_generated": "2025-12-23T09:33:21.248020273Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:21.179058032Z", + "publish_time": "2025-12-23T09:33:21.179385245Z", + "first_worker_start": "2025-12-23T09:33:21.179795262Z", + "last_worker_end": "2025-12-23T09:33:21.24691Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:21.179720659Z", + "start_time": "2025-12-23T09:33:21.179795262Z", + "end_time": "2025-12-23T09:33:21.179887766Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:21.179928Z", + "start_time": "2025-12-23T09:33:21.180087Z", + "end_time": "2025-12-23T09:33:21.24691Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:21.179785362Z", + "start_time": "2025-12-23T09:33:21.179878565Z", + "end_time": "2025-12-23T09:33:21.180001971Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:21.179693058Z", + "start_time": "2025-12-23T09:33:21.179797162Z", + "end_time": "2025-12-23T09:33:21.179855164Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4752, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/00279d6affe18816c5a0bc9578f9c91d90e7c812.json b/data/output/00279d6affe18816c5a0bc9578f9c91d90e7c812.json new file mode 100644 index 0000000..b6d9caa --- /dev/null +++ b/data/output/00279d6affe18816c5a0bc9578f9c91d90e7c812.json @@ -0,0 +1,350 @@ +{ + "file_name": "00279d6affe18816c5a0bc9578f9c91d90e7c812.txt", + "total_words": 533, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "fossil", + "count": 16 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "on", + "count": 13 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "is", + "count": 10 + }, + { + "word": "was", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "jurassic", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Mr .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "On .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Many .", + "length": 6 + }, + { + "text": "The 1.", + "length": 6 + }, + { + "text": "beaches.", + "length": 8 + }, + { + "text": "Geologist .", + "length": 11 + }, + { + "text": "find is made.", + "length": 13 + }, + { + "text": "Use common sense.", + "length": 17 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Mr Crossley told the BBC: .", + "length": 27 + }, + { + "text": "billionaire's swimming pool.", + "length": 28 + }, + { + "text": "said it was 'a beautiful find'.", + "length": 31 + }, + { + "text": "It took eight hours to remove .", + "length": 31 + }, + { + "text": "illicit trade in fossil poaching.", + "length": 33 + }, + { + "text": "' Fossil-hunting is big business.", + "length": 33 + }, + { + "text": "But scientists are increasingly concerned .", + "length": 43 + }, + { + "text": "dolphin-like sea creature - near Lyme Regis.", + "length": 44 + }, + { + "text": "hunting at the moment, we've seen more people than normal on the .", + "length": 66 + }, + { + "text": "laboratories or museums but on the walls of business HQs, Russian .", + "length": 67 + }, + { + "text": "'The word is already out that Lyme Regis is the capital of fossil .", + "length": 67 + }, + { + "text": "the Jurassic Coast, a fossil-collecting code allows the removal of .", + "length": 68 + }, + { + "text": "that the lucrative market in dinosaur remains is driving a booming .", + "length": 68 + }, + { + "text": "Edmonds spotted a small part of the perfectly-preserved jaw of the .", + "length": 68 + }, + { + "text": "experts claim that these irreplaceable ancient treasures, which are .", + "length": 69 + }, + { + "text": "oligarchs' mansions and even, in one case, the bottom of a Japanese .", + "length": 69 + }, + { + "text": "vital for our understanding of Earth's history, are ending up not in .", + "length": 70 + }, + { + "text": "fossils provided the cliff-face is not damaged, and the code requires .", + "length": 71 + }, + { + "text": "and professional fossil hunter Paul Crossley, who helped excavate it, .", + "length": 71 + }, + { + "text": "rockhounds to notify the authorities only if a particularly important .", + "length": 71 + }, + { + "text": "extinct reptile sticking out of the mud - and said he almost ignored it.", + "length": 72 + }, + { + "text": "giant marine reptile fossil was painstakingly removed over eight hours .", + "length": 72 + }, + { + "text": "Richard Edmonds stumbled across the 18-inch fossil of an ichthyosaur - a .", + "length": 74 + }, + { + "text": "Fossils - the remains of dead organisms preserved in rocks - can fetch millions.", + "length": 80 + }, + { + "text": "In April the remains of a 195-million-year-old reptile were discovered on the Jurassic Coast.", + "length": 93 + }, + { + "text": "But on closer inspection the 50-year-old fossil hunter realised it was a full ichthyosaur jaw.", + "length": 94 + }, + { + "text": "Find: The skeleton - which is about 5ft long - can be seen at the base of Black Ven ner Charmouth .", + "length": 99 + }, + { + "text": "Fossil experts work to carefully remove the skeleton, pictured close-up right, on the Jurassic Coast.", + "length": 101 + }, + { + "text": "People come from all over the UK for a days fossil hunting on the beach especially on a sunny weekend .", + "length": 103 + }, + { + "text": "5m (5ft) fossil was uncovered due to heavy storms and was found at the base of Black Ven near Charmouth.", + "length": 104 + }, + { + "text": "'We always advise going when the tide is falling and always stay well away from the cliffs and mudslides.", + "length": 105 + }, + { + "text": "A fossil expert works to uncover the rare skeleton - which is worth £15,000 - discovered by hobby collector Alan Saxon .", + "length": 121 + }, + { + "text": "The Jurassic Cliffs at Charmouth in Dorset have never been more busy since a fossil worth £15,000 was found on the beach on Boxing Day .", + "length": 137 + }, + { + "text": "Scientists are increasingly concerned that the lucrative market in dinosaur remains is driving a booming illicit trade in fossil poaching .", + "length": 139 + }, + { + "text": "The Jurassic Coast in Dorset has been packed with treasure hunters since a giant marine reptile fossil worth £15,000 was found on a beach on Boxing Day.", + "length": 153 + }, + { + "text": "The near-complete ichthyosaur skeleton was discovered by hobby collector Alan Saxon, from Chippenham in Wiltshire, who was on a post-Christmas visit to the Jurassic Coast.", + "length": 171 + }, + { + "text": "People come from all over the country for a day's fossil hunting along Britain's Jurassic Coast - a 96-mile-long world heritage site, which gets its name from its cliffs which were formed 180 million years ago.", + "length": 210 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4511474072933197 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:21.680263984Z", + "first_section_created": "2025-12-23T09:33:21.681937853Z", + "last_section_published": "2025-12-23T09:33:21.682137561Z", + "all_results_received": "2025-12-23T09:33:21.749773348Z", + "output_generated": "2025-12-23T09:33:21.749911354Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:21.681937853Z", + "publish_time": "2025-12-23T09:33:21.682137561Z", + "first_worker_start": "2025-12-23T09:33:21.682578779Z", + "last_worker_end": "2025-12-23T09:33:21.747334Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:21.68260508Z", + "start_time": "2025-12-23T09:33:21.682670583Z", + "end_time": "2025-12-23T09:33:21.682736886Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:21.682886Z", + "start_time": "2025-12-23T09:33:21.683017Z", + "end_time": "2025-12-23T09:33:21.747334Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:21.682618381Z", + "start_time": "2025-12-23T09:33:21.682686984Z", + "end_time": "2025-12-23T09:33:21.682769787Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:21.682505676Z", + "start_time": "2025-12-23T09:33:21.682578779Z", + "end_time": "2025-12-23T09:33:21.682611681Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3167, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0027d089bda2e8aa403aae875f7084bf35e77998.json b/data/output/0027d089bda2e8aa403aae875f7084bf35e77998.json new file mode 100644 index 0000000..43c4690 --- /dev/null +++ b/data/output/0027d089bda2e8aa403aae875f7084bf35e77998.json @@ -0,0 +1,286 @@ +{ + "file_name": "0027d089bda2e8aa403aae875f7084bf35e77998.txt", + "total_words": 553, + "top_n_words": [ + { + "word": "to", + "count": 25 + }, + { + "word": "the", + "count": 23 + }, + { + "word": "i", + "count": 16 + }, + { + "word": "was", + "count": 15 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "lisa", + "count": 13 + }, + { + "word": "pole", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "of", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "thanks to pole dancing.", + "length": 23 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "I'm really proud of her.", + "length": 24 + }, + { + "text": "I was depressed and in pain.", + "length": 28 + }, + { + "text": "'I had lost my life as it was.", + "length": 30 + }, + { + "text": "I thought I had to accept my limitations.", + "length": 41 + }, + { + "text": "The hardest thing is to take the first step.", + "length": 44 + }, + { + "text": "I was told I would never be as I was before my accident.", + "length": 56 + }, + { + "text": "Everyone was telling me what I couldn't do, what I no-longer was.", + "length": 65 + }, + { + "text": "However, I felt it was time to accept that this was me, and rebuild myself.", + "length": 75 + }, + { + "text": "She does move after move and she's an inspiration to other girls in the class.", + "length": 78 + }, + { + "text": "A mother who lost her leg in an horrific car crash has found a new lease of life...", + "length": 83 + }, + { + "text": "Lisa now takes part in competitions as Britain's only below-knee amputee pole dancer .", + "length": 86 + }, + { + "text": "Lisa  had to have her right leg amputated below the knee following a car crash in 2007 .", + "length": 89 + }, + { + "text": "' Lisa no longer feels the same low body confidence she had directly after her accident .", + "length": 89 + }, + { + "text": "The challenges of pole dancing meant there were days when Lisa struggled to walk afterwards .", + "length": 93 + }, + { + "text": "Lisa Eagleton, 40, had to have her right leg amputated below the knee following the smash in 2007.", + "length": 98 + }, + { + "text": "The socket in my prosthetic leg rubbed, causing blisters that resulted in me being barely able to walk.", + "length": 103 + }, + { + "text": "' Lisa was referred to the Pace Rehabilitation Centre in Cheadle, where the support helped her walk again.", + "length": 106 + }, + { + "text": "Lisa shows off some of her incredible skills on the pole, which took a long time and determination to master .", + "length": 110 + }, + { + "text": "Now she is also wowing crowds and takes part in competitions as Britain's only below-knee amputee pole dancer.", + "length": 110 + }, + { + "text": "The mother-of-four from Wigan - who is also known as Lady Lush Lisa - went on to become an alternative catwalk model.", + "length": 117 + }, + { + "text": "'My teachers helped me learn the moves, some of which took a long time to master, but I was determined to dance again.", + "length": 118 + }, + { + "text": "But, after months of rehabilitation and support at a specialist centre, she regained her confidence and learned to walk again.", + "length": 126 + }, + { + "text": "Lisa, who had enjoyed dancing before the crash, said: 'Even months after my amputation, I was in pain every day, living on morphine.", + "length": 132 + }, + { + "text": "The challenges of pole dancing meant there were days when Lisa struggled to walk afterwards, but her determined spirit saw her through.", + "length": 135 + }, + { + "text": "'I had missed dance so much, but decided a route to return to it was pole dancing, as that was low impact on my leg and offered me support.", + "length": 139 + }, + { + "text": "Lisa said: 'Since the accident, I felt unattractive, had low body confidence and was uncertain of what the world thought of a limbless woman.", + "length": 141 + }, + { + "text": "She said: 'After lots of practice, I eventually managed to compete in some competitions and became Britain's only below-knee amputee pole dancer.", + "length": 145 + }, + { + "text": "Out of the blue she was offered a chance to take part in a photoshoot, which led to her becoming an alternative model and taking up pole dancing.", + "length": 145 + }, + { + "text": "' Lisa, who swims eight hours a week and does yoga to help ease the pain she still suffers, said: 'There's always a way to do things to make you happy.", + "length": 151 + }, + { + "text": "' Victoria Bryne, from Pole Seduction, the Wigan pole dancing and fitness school where Lisa learned how to pole dance, said: 'The things she can do on the pole now are unbelievable.", + "length": 181 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4182020425796509 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:22.181886706Z", + "first_section_created": "2025-12-23T09:33:22.182201019Z", + "last_section_published": "2025-12-23T09:33:22.182422528Z", + "all_results_received": "2025-12-23T09:33:22.245645533Z", + "output_generated": "2025-12-23T09:33:22.245834341Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:22.182201019Z", + "publish_time": "2025-12-23T09:33:22.182422528Z", + "first_worker_start": "2025-12-23T09:33:22.183043154Z", + "last_worker_end": "2025-12-23T09:33:22.244737Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:22.183010752Z", + "start_time": "2025-12-23T09:33:22.183083755Z", + "end_time": "2025-12-23T09:33:22.183163659Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:22.183259Z", + "start_time": "2025-12-23T09:33:22.183415Z", + "end_time": "2025-12-23T09:33:22.244737Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:22.183011752Z", + "start_time": "2025-12-23T09:33:22.183076955Z", + "end_time": "2025-12-23T09:33:22.183136657Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:22.182980351Z", + "start_time": "2025-12-23T09:33:22.183043154Z", + "end_time": "2025-12-23T09:33:22.183085355Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3012, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/0028aa521d2b3363a2a2d7d74b2961c2c5a8f514.json b/data/output/0028aa521d2b3363a2a2d7d74b2961c2c5a8f514.json new file mode 100644 index 0000000..be8392f --- /dev/null +++ b/data/output/0028aa521d2b3363a2a2d7d74b2961c2c5a8f514.json @@ -0,0 +1,254 @@ +{ + "file_name": "0028aa521d2b3363a2a2d7d74b2961c2c5a8f514.txt", + "total_words": 293, + "top_n_words": [ + { + "word": "the", + "count": 19 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "in", + "count": 5 + }, + { + "word": "at", + "count": 4 + }, + { + "word": "camp", + "count": 4 + }, + { + "word": "is", + "count": 4 + }, + { + "word": "korean", + "count": 4 + }, + { + "word": "s", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Gen.", + "length": 4 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "\" A joint U.", + "length": 12 + }, + { + "text": "The alarm was raised this month when a U.", + "length": 41 + }, + { + "text": "military base, the Defense Ministry said.", + "length": 41 + }, + { + "text": "Journalist Yoonjung Seo contributed to this report.", + "length": 51 + }, + { + "text": "John Johnson, who is heading the Camp Carroll Task Force.", + "length": 57 + }, + { + "text": "\"If we get evidence that there is a risk to health, we are going to fix it.", + "length": 75 + }, + { + "text": "The tests follow allegations of American soldiers burying chemicals on Korean soil.", + "length": 83 + }, + { + "text": "It has also been linked to birth defects, according to the Department of Veterans Affairs.", + "length": 90 + }, + { + "text": "- South Korean investigation is being conducted at Camp Carroll to test the validity of allegations.", + "length": 100 + }, + { + "text": "\"We've been working very closely with the Korean government since we had the initial claims,\" said Lt.", + "length": 102 + }, + { + "text": "Once testing is finished, the government will decide on how to test more than 80 other sites -- all former bases.", + "length": 113 + }, + { + "text": "(CNN) -- South Korea launched an investigation Tuesday into reports of toxic chemicals being dumped at a former U.", + "length": 114 + }, + { + "text": "veteran alleged barrels of the toxic herbicide Agent Orange were buried at an American base in South Korea in the late 1970s.", + "length": 125 + }, + { + "text": "military sprayed Agent Orange from planes onto jungles in Vietnam to kill vegetation in an effort to expose guerrilla fighters.", + "length": 127 + }, + { + "text": "Exposure to the chemical has been blamed for a wide variety of ailments, including certain forms of cancer and nerve disorders.", + "length": 127 + }, + { + "text": "Two of his fellow soldiers corroborated his story about Camp Carroll, about 185 miles (300 kilometers) southeast of the capital, Seoul.", + "length": 135 + }, + { + "text": "The first tests are being carried out by a joint military, government and civilian task force at the site of what was Camp Mercer, west of Seoul.", + "length": 145 + }, + { + "text": "\"Soil and underground water will be taken in the areas where toxic chemicals were allegedly buried,\" said the statement from the South Korean Defense Ministry.", + "length": 159 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6389083862304688 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:22.683209926Z", + "first_section_created": "2025-12-23T09:33:22.683677645Z", + "last_section_published": "2025-12-23T09:33:22.683885453Z", + "all_results_received": "2025-12-23T09:33:22.744722845Z", + "output_generated": "2025-12-23T09:33:22.74485705Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:22.683677645Z", + "publish_time": "2025-12-23T09:33:22.683885453Z", + "first_worker_start": "2025-12-23T09:33:22.684495777Z", + "last_worker_end": "2025-12-23T09:33:22.743797Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:22.684455875Z", + "start_time": "2025-12-23T09:33:22.684522178Z", + "end_time": "2025-12-23T09:33:22.68456248Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:22.684721Z", + "start_time": "2025-12-23T09:33:22.684878Z", + "end_time": "2025-12-23T09:33:22.743797Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:22.684425674Z", + "start_time": "2025-12-23T09:33:22.684497377Z", + "end_time": "2025-12-23T09:33:22.684542879Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:22.684425274Z", + "start_time": "2025-12-23T09:33:22.684495777Z", + "end_time": "2025-12-23T09:33:22.684517178Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1732, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0028c789435d66119c8dc0c72ce2703adbf7ce2e.json b/data/output/0028c789435d66119c8dc0c72ce2703adbf7ce2e.json new file mode 100644 index 0000000..783a5d9 --- /dev/null +++ b/data/output/0028c789435d66119c8dc0c72ce2703adbf7ce2e.json @@ -0,0 +1,552 @@ +{ + "file_name": "0028c789435d66119c8dc0c72ce2703adbf7ce2e.txt", + "total_words": 1364, + "top_n_words": [ + { + "word": "the", + "count": 114 + }, + { + "word": "of", + "count": 43 + }, + { + "word": "a", + "count": 40 + }, + { + "word": "and", + "count": 34 + }, + { + "word": "in", + "count": 33 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "s", + "count": 23 + }, + { + "word": "is", + "count": 20 + }, + { + "word": "was", + "count": 19 + }, + { + "word": "room", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "It .", + "length": 4 + }, + { + "text": "With .", + "length": 6 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "These were always busy.", + "length": 23 + }, + { + "text": "doubles as Downton Abbey.", + "length": 25 + }, + { + "text": "05:35 EST, 24 December 2013 .", + "length": 29 + }, + { + "text": "19:06 EST, 23 December 2013 .", + "length": 29 + }, + { + "text": "A valet had to do everything.", + "length": 29 + }, + { + "text": "Adjoining the marital bedroom.", + "length": 30 + }, + { + "text": "It is the nerve centre of the house.", + "length": 36 + }, + { + "text": "He would earn £60 a year (£6,000 today).", + "length": 42 + }, + { + "text": "Footmen and housemaids worked 18 hours a day.", + "length": 45 + }, + { + "text": "They cost £495 (£50,000 in today’s money).", + "length": 46 + }, + { + "text": "The Dowager Countess’s favourite sitting room.", + "length": 48 + }, + { + "text": "The plain white crockery is stacked on dressers.", + "length": 48 + }, + { + "text": "2 million on the land and 970,000 in the mines.", + "length": 49 + }, + { + "text": "By 1920, cars  were a common sight on the drive.", + "length": 49 + }, + { + "text": "This was menial work, but there was no shame in it.", + "length": 51 + }, + { + "text": "Has the human touches that the austere pantry lacks.", + "length": 52 + }, + { + "text": "The footmen’s cubby hole, with its own set of bells.", + "length": 54 + }, + { + "text": "Buckets of hot water would be brought from the kitchens.", + "length": 56 + }, + { + "text": "Sometimes serves as the Earl of Grantham’s private den.", + "length": 57 + }, + { + "text": "meet to eat and talk, under the watchful eye of the butler.", + "length": 59 + }, + { + "text": "3 million people worked in domestic service, compared to 1.", + "length": 61 + }, + { + "text": "In 1912, the year in which Downton’s first series was set, 1.", + "length": 63 + }, + { + "text": "has high windows that let in cascades of light — one of  the .", + "length": 65 + }, + { + "text": "That is why kitchens were located in the basement of great houses.", + "length": 66 + }, + { + "text": "carpets was to sprinkle tea leaves over the floor and sweep them up.", + "length": 68 + }, + { + "text": "The only room in the main house where the smell of food was welcomed.", + "length": 69 + }, + { + "text": "Sometimes doubles as his bedroom, for instance when his wife was ill.", + "length": 69 + }, + { + "text": "While the family is at dinner, the maids will lay out the nightclothes.", + "length": 71 + }, + { + "text": "distinctive features of the real stately home, Highclere Castle, which .", + "length": 72 + }, + { + "text": "its plain table and wooden chairs this is where all the domestic staff .", + "length": 72 + }, + { + "text": "an age before vacuum cleaners, one popular method for lifting dust from .", + "length": 73 + }, + { + "text": "The boot room was the scene of a violent attack on lady’s maid Anna Bates.", + "length": 76 + }, + { + "text": "In real life, a butler was the only servant allowed to answer the front door.", + "length": 77 + }, + { + "text": "The Earl’s valet Mr Bates helps him dress here and would even cut his hair.", + "length": 77 + }, + { + "text": "Servants could not go to  bed until the family retired, often around midnight.", + "length": 79 + }, + { + "text": "It was not unusual for stately homes such as Downton to have 30 guests at a time.", + "length": 81 + }, + { + "text": "In these two rooms at Highclere, there are 5,600 books dating back to Tudor times.", + "length": 82 + }, + { + "text": "The open-top is a Crossley RFC, as owned by King George V and the  Prince of Wales.", + "length": 84 + }, + { + "text": "Elsewhere, the family would have been scandalised to catch a whiff of cooking odours.", + "length": 85 + }, + { + "text": "A countess such as Cora would not bathe alone: her lady’s maid would be in attendance.", + "length": 88 + }, + { + "text": "In 1912, footmen would earn about £25 a year (£2,500 a year today), plus bed and board.", + "length": 89 + }, + { + "text": "Grandly furnished with polished walnut tables and lacquered cabinets displaying porcelain.", + "length": 90 + }, + { + "text": "Where all the family’s shoes and boots had to be shined every day, often 30 pairs or more.", + "length": 92 + }, + { + "text": "Where the butler met  guests, though never  guests of the servants — that  was forbidden.", + "length": 94 + }, + { + "text": "In the early part of the 20th century about 5 per cent of births resulted in the mother’s death.", + "length": 98 + }, + { + "text": "In the centre of this sunny room, there is a freestanding white bath — with no plumbing or taps.", + "length": 98 + }, + { + "text": "Finding a husband was tough after the carnage of World War I: two million women would stay spinsters.", + "length": 101 + }, + { + "text": "Where the middle Crawley daughter prepares for her wedding to Sir Anthony Strallan, only to be jilted.", + "length": 102 + }, + { + "text": "Maids took care to clean only when family rooms were vacant and any breakages were deducted from wages.", + "length": 103 + }, + { + "text": "her bathroom, adjoining the marital bedroom, with a free-standing bath - which has no plumbing or taps .", + "length": 104 + }, + { + "text": "Once this was done, a footman might pause for a quick cigarette, but female servants were forbidden to smoke.", + "length": 109 + }, + { + "text": "There is an armchair, a bedside table with a lamp, a washstand and a faded woollen rug on the worn floorboards.", + "length": 111 + }, + { + "text": "With its single window and iron bed, it is almost as bare as a prison cell and would have been ice-cold in winter.", + "length": 114 + }, + { + "text": "It is from a high-backed leather chair behind a desk that the butler dispenses wisdom and justice among his staff.", + "length": 114 + }, + { + "text": "The only piece of ornate furniture is the wall of bells, each one connected by wires to a different room upstairs.", + "length": 114 + }, + { + "text": "And it has seen plenty of it, especially the sudden death of Lady Mary’s Turkish lover, the diplomat Kemal Pamuk.", + "length": 115 + }, + { + "text": "It has a great iron range — like a stack of Aga ovens — with copper kettles bubbling away and twin stone sinks.", + "length": 115 + }, + { + "text": "With its red flock wallpaper, full-length oval mirror and four-poster bed, this room is a fine setting for melodrama.", + "length": 117 + }, + { + "text": "Here, the Downton housekeeper can relax in the plush  velvet chair, surrounded by photos of her family and nick-nacks.", + "length": 119 + }, + { + "text": "The bootroom (right) where the family's shoes and boots would be shined, was the scene of a violent attack on Anna Bates .", + "length": 122 + }, + { + "text": "The bedroom of Thomas the thieving footman, at the end of the servants' corridor, is typical of a servant's sleeping quarters .", + "length": 127 + }, + { + "text": "Scene of perhaps the most heartwrenching episode when Lord Grantham’s youngest daughter dies following the birth of her baby.", + "length": 127 + }, + { + "text": "A copper pan would be filled with hot coals and, with its lid shut, slid between the sheets to make sure they are warm and dry.", + "length": 127 + }, + { + "text": "The bedroom of Thomas the thieving footman, at the end of the servants’ corridor, is typical of a servant’s sleeping quarters.", + "length": 130 + }, + { + "text": "This particular room was where the body of the diplomat Kemal Pamuk was taken after he had died in Lady Mary’s arms in her bedroom.", + "length": 133 + }, + { + "text": "For the first time it allows fans to visualise exactly where the  household’s many love affairs, feuds  and dramas are played out...", + "length": 136 + }, + { + "text": "This scene shot shows butler Carson with Lady Cora in Downton Abbey's drawing room, which the family retire into after meals to sip brandy .", + "length": 140 + }, + { + "text": "That was equally true for the aristocracy and the working-class, and the death rate did not start to fall until antibiotics arrived in the Fifties.", + "length": 147 + }, + { + "text": "This is the 3D floorplan of Downton Abbey as it will appear in tomorrow's Christmas special episode - complete with a tree and Matthew's gramophone .", + "length": 149 + }, + { + "text": "Overlooked by balustrades on the first floor, this has Ming vases, Chinese lacquer cabinets, a huge fireplace, and enough floor space to hold a ball.", + "length": 149 + }, + { + "text": "The dining room would have been the only room in the main house where the smell of food was welcomed - that is why the kitchen is located in the basement .", + "length": 155 + }, + { + "text": "Scene of confidential fireside chats, it is the room to which all the family retire after meals to sip brandy or continue conversations from the dining table.", + "length": 158 + }, + { + "text": "The room was usually left in disarray — a maid’s first job at 5am, before she could have breakfast, was to clear the glasses, tidy the cushions and rake out the fire.", + "length": 170 + }, + { + "text": "But in real life the bedroom would also be the scene of the most mundane tasks: a maid would tiptoe in at dawn to light the fire and, if necessary, remove the chamber pot.", + "length": 171 + }, + { + "text": "Mrs Patmore’s domain, though the recent arrival of an electric mixer makes the cook fear that households such as Downton will soon no longer need to employ the likes of her.", + "length": 175 + }, + { + "text": "Where the Earl of Grantham and his family open their presents on Christmas Day, and where they help themselves  to a luncheon buffet laid out under  the glass-fronted book cabinets.", + "length": 183 + }, + { + "text": "The tradition at Downton is that the  staff have their Christmas dinner at lunchtime in the servants’ hall, and the family are served with theirs in the dining room in the evening.", + "length": 183 + }, + { + "text": "A room this size reflected the splendour of the family’s lifestyle: at Highclere before World War I, this required a domestic staff that included 25 maids, 14 footmen and three chefs.", + "length": 185 + }, + { + "text": "On the eve of the Downton Abbey Christmas special, this gloriously detailed floorplan is based on Highclere Castle (the real-life model for Downton) and the fictional house as it appears on TV.", + "length": 193 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.49926815927028656 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:23.184970354Z", + "first_section_created": "2025-12-23T09:33:23.185277666Z", + "last_section_published": "2025-12-23T09:33:23.185765386Z", + "all_results_received": "2025-12-23T09:33:23.340481169Z", + "output_generated": "2025-12-23T09:33:23.340730478Z", + "total_processing_time_ms": 155, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 154, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:23.185277666Z", + "publish_time": "2025-12-23T09:33:23.185584378Z", + "first_worker_start": "2025-12-23T09:33:23.186060897Z", + "last_worker_end": "2025-12-23T09:33:23.339607Z", + "total_journey_time_ms": 154, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:23.186174802Z", + "start_time": "2025-12-23T09:33:23.18637491Z", + "end_time": "2025-12-23T09:33:23.186477514Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:23.186547Z", + "start_time": "2025-12-23T09:33:23.186671Z", + "end_time": "2025-12-23T09:33:23.339607Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 152 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:23.185997495Z", + "start_time": "2025-12-23T09:33:23.186060897Z", + "end_time": "2025-12-23T09:33:23.186167401Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:23.186256905Z", + "start_time": "2025-12-23T09:33:23.186320907Z", + "end_time": "2025-12-23T09:33:23.186368609Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:23.185660781Z", + "publish_time": "2025-12-23T09:33:23.185765386Z", + "first_worker_start": "2025-12-23T09:33:23.186345208Z", + "last_worker_end": "2025-12-23T09:33:23.273486Z", + "total_journey_time_ms": 87, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:23.186287006Z", + "start_time": "2025-12-23T09:33:23.186351609Z", + "end_time": "2025-12-23T09:33:23.186400311Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:23.186507Z", + "start_time": "2025-12-23T09:33:23.186674Z", + "end_time": "2025-12-23T09:33:23.273486Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 86 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:23.186281806Z", + "start_time": "2025-12-23T09:33:23.186509515Z", + "end_time": "2025-12-23T09:33:23.186594318Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:23.186310207Z", + "start_time": "2025-12-23T09:33:23.186345208Z", + "end_time": "2025-12-23T09:33:23.18637661Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 238, + "min_processing_ms": 86, + "max_processing_ms": 152, + "avg_processing_ms": 119, + "median_processing_ms": 152, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3800, + "slowest_section_id": 0, + "slowest_section_time_ms": 154 + } +} diff --git a/data/output/0028e335e8d06f2bab786a15965621db28e2ba55.json b/data/output/0028e335e8d06f2bab786a15965621db28e2ba55.json new file mode 100644 index 0000000..e4d8d37 --- /dev/null +++ b/data/output/0028e335e8d06f2bab786a15965621db28e2ba55.json @@ -0,0 +1,464 @@ +{ + "file_name": "0028e335e8d06f2bab786a15965621db28e2ba55.txt", + "total_words": 966, + "top_n_words": [ + { + "word": "the", + "count": 59 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "for", + "count": 15 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "that", + "count": 12 + }, + { + "word": "who", + "count": 12 + }, + { + "word": "immigration", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Steve King .", + "length": 12 + }, + { + "text": "Let's find out.", + "length": 15 + }, + { + "text": "without papers.", + "length": 15 + }, + { + "text": "\" Don't believe it.", + "length": 19 + }, + { + "text": "Of course they did.", + "length": 19 + }, + { + "text": "Who wouldn't have been?", + "length": 23 + }, + { + "text": "I bet they were afraid.", + "length": 23 + }, + { + "text": "But this story isn't over.", + "length": 26 + }, + { + "text": "Opinion: Let's deport Rep.", + "length": 26 + }, + { + "text": "Luis Gutierrez, D-Illinois.", + "length": 27 + }, + { + "text": "This is how Americans behave.", + "length": 29 + }, + { + "text": "The letter -- signed by Reps.", + "length": 29 + }, + { + "text": "Now, they have been set free.", + "length": 29 + }, + { + "text": "There's not room for nine more?", + "length": 31 + }, + { + "text": "All nine will get asylum hearings.", + "length": 34 + }, + { + "text": "These kids are Americans, all right.", + "length": 36 + }, + { + "text": "We're ornery, courageous and defiant.", + "length": 37 + }, + { + "text": "For others, it is major inconvenience.", + "length": 38 + }, + { + "text": "The nine asked for humanitarian parole.", + "length": 39 + }, + { + "text": "When that was denied, they claimed asylum.", + "length": 42 + }, + { + "text": "We yell \"freedom\" at the top of our lungs.", + "length": 42 + }, + { + "text": "Searching for the American Dream in Mexico .", + "length": 44 + }, + { + "text": "So far, Leopold has been spectacularly wrong.", + "length": 45 + }, + { + "text": "It's a major victory for the Dreamer movement.", + "length": 46 + }, + { + "text": "And the Dreamers got more warmth from Congress.", + "length": 47 + }, + { + "text": "I've had my differences with the Dreamer movement.", + "length": 50 + }, + { + "text": "All because they wouldn't take \"go\" for an answer.", + "length": 50 + }, + { + "text": "Opinion: House, knowledge economy needs immigrants .", + "length": 52 + }, + { + "text": "It's a story that the White House wishes would go away.", + "length": 55 + }, + { + "text": "For many, what happened to the Dream 9 was the last straw.", + "length": 58 + }, + { + "text": "Some might not be eligible for deferred action, some might be.", + "length": 62 + }, + { + "text": "The Dream 9 are back where they belong -- in the United States.", + "length": 63 + }, + { + "text": "The fault lines it exposed within the immigration reform movement remain.", + "length": 73 + }, + { + "text": "Didn't Obama say that the Dreamers were Americans except for legal status?", + "length": 74 + }, + { + "text": "Nearly 300,000 Dreamers have been awarded the special accommodation so far.", + "length": 75 + }, + { + "text": "I wouldn't presume to tell people who put their freedom at risk along the U.", + "length": 76 + }, + { + "text": "\" Immigration reformers were bound to lose confidence in this administration.", + "length": 77 + }, + { + "text": "-Mexico border that the difficult decisions they're making are the wrong ones.", + "length": 78 + }, + { + "text": "The next thing you know, a bunch of Dreamers will dump tea into Boston Harbor.", + "length": 78 + }, + { + "text": "The opinions expressed in this commentary are solely those of Ruben Navarrette.", + "length": 79 + }, + { + "text": "But I never questioned their sincerity, or dismissed anything they did as a stunt.", + "length": 82 + }, + { + "text": "The idea was to draw attention to the administration's repressive immigration policies.", + "length": 87 + }, + { + "text": "And I certainly wouldn't do it to protect an administration that doesn't deserve protecting.", + "length": 92 + }, + { + "text": "Then, on July 22, all nine linked arms and marched across the border into the United States.", + "length": 92 + }, + { + "text": "Martinez Valdez and Peniche-Vargas--had been put in solitary confinement for what was supposed to be 15 days.", + "length": 109 + }, + { + "text": "For many, this is a heartwarming story about the power of the individual who is brave enough to make a stand.", + "length": 109 + }, + { + "text": "I think that many of these young people have a sense of entitlement, like most young people in the United States.", + "length": 113 + }, + { + "text": "Thirty-five lawmakers signed a letter asking Obama to use his discretion to release the young people from custody.", + "length": 114 + }, + { + "text": "San Diego, California (CNN) -- The \"Dream 9,\" five women and four men, say that they are \"undocumented and unafraid.", + "length": 116 + }, + { + "text": "He also said it was unlikely that the three Dreamers who voluntarily left the United States would qualify for asylum.", + "length": 117 + }, + { + "text": "Pending those hearings, the nine have now been released into the waiting arms of family members within the United States.", + "length": 121 + }, + { + "text": "Martinez-Valdez and Peniche-Vargas were in solitary confinement because, according to authorities, they started a ruckus in the dining hall.", + "length": 140 + }, + { + "text": "Ditto for immigration reformers who are split between those who back the Dream 9 and those who want to sacrifice them for political expediency.", + "length": 143 + }, + { + "text": "Three of them who were on this side of the border -- Saavedra, Martinez Valdez and Mateo-Jimenez -- \"self deported\" to Mexico to join the other six.", + "length": 148 + }, + { + "text": "David Leopold, former president of the American Immigration Lawyers Association, callously dismissed the Dream 9 action as a \"publicity stunt\" and a distraction.", + "length": 161 + }, + { + "text": "I didn't think it was a good idea for Dreamer activists to disrupt congressional hearings on immigration reform, or occupy the offices of reform advocates like Rep.", + "length": 164 + }, + { + "text": "Besides, Obama made quite a show -- before the election -- of announcing that his administration was sparing Dreamers deportation by offering deferred action and temporary work permits.", + "length": 185 + }, + { + "text": "of what drives Dreamer activists within the United States is a look-at-me narcissism fed by social media and a culture where young people are told they can become the next American Idol.", + "length": 186 + }, + { + "text": "Earlier in the week, federal officials found that all nine have \"credible fear\" of persecution or torture in their birth countries and thus cannot be removed without a hearing before an immigration judge.", + "length": 204 + }, + { + "text": "Mike Honda, D-California, Raul Grijalva, D-Arizona, and Ruben Hinojosa, D-Texas, among others -- describes the activists as \"victims of our broken immigration policy\" who \"deserve to come home to the United States.", + "length": 214 + }, + { + "text": "Some of these \"Dreamers\" had been deported to Mexico by the Obama administration, though President Obama has repeatedly said that his administration is not looking to deport Dreamers--the name taken by young immigrants who are in the U.", + "length": 236 + }, + { + "text": "The nine -- Claudia Amaro, Adriana Gil Diaz, Luis Leon Lopez, Maria Peniche-Vargas, Ceferino Santiago, LuLu Martinez Valdez, Mario Felix-Garcia, Marco Saavedra and Lizbeth Mateo-Jimenez -- spent more than two weeks in a federal immigration detention facility in Eloy, AZ.", + "length": 271 + }, + { + "text": "A president can't deport nearly 2 million people in under five years, split up hundreds of thousands of families, detain thousands of undocumented children without giving them access to legal counsel, and expand Arizona-style immigration enforcement nationwide through the maniacal program known as \"Secure Communities\" without raising a stink.", + "length": 344 + }, + { + "text": "Maybe there would be more compassion on the Potomac if these young people hadn't embarrassed Obama, put the lie to the fairy tale that this administration has been compassionate toward immigrants, divided self-serving organizations such as the lawyers group AILA, and pitted one group of immigration reformers against another -- those who want to protect the Dreamers versus those who want to protect the president.", + "length": 415 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6585030108690262 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:23.686586677Z", + "first_section_created": "2025-12-23T09:33:23.68691859Z", + "last_section_published": "2025-12-23T09:33:23.68742851Z", + "all_results_received": "2025-12-23T09:33:23.790727271Z", + "output_generated": "2025-12-23T09:33:23.79095588Z", + "total_processing_time_ms": 104, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 103, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:23.68691859Z", + "publish_time": "2025-12-23T09:33:23.687278004Z", + "first_worker_start": "2025-12-23T09:33:23.687658619Z", + "last_worker_end": "2025-12-23T09:33:23.789798Z", + "total_journey_time_ms": 102, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:23.687747322Z", + "start_time": "2025-12-23T09:33:23.687821125Z", + "end_time": "2025-12-23T09:33:23.687912429Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:23.688038Z", + "start_time": "2025-12-23T09:33:23.688187Z", + "end_time": "2025-12-23T09:33:23.789798Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 101 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:23.687716221Z", + "start_time": "2025-12-23T09:33:23.687833926Z", + "end_time": "2025-12-23T09:33:23.687964331Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:23.687593216Z", + "start_time": "2025-12-23T09:33:23.687658619Z", + "end_time": "2025-12-23T09:33:23.68769992Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:23.687359307Z", + "publish_time": "2025-12-23T09:33:23.68742851Z", + "first_worker_start": "2025-12-23T09:33:23.687756823Z", + "last_worker_end": "2025-12-23T09:33:23.760844Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:23.687888128Z", + "start_time": "2025-12-23T09:33:23.688037334Z", + "end_time": "2025-12-23T09:33:23.688058535Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:23.688038Z", + "start_time": "2025-12-23T09:33:23.688153Z", + "end_time": "2025-12-23T09:33:23.760844Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:23.687806125Z", + "start_time": "2025-12-23T09:33:23.687830726Z", + "end_time": "2025-12-23T09:33:23.687866627Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:23.687731422Z", + "start_time": "2025-12-23T09:33:23.687756823Z", + "end_time": "2025-12-23T09:33:23.687807625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 173, + "min_processing_ms": 72, + "max_processing_ms": 101, + "avg_processing_ms": 86, + "median_processing_ms": 101, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2908, + "slowest_section_id": 0, + "slowest_section_time_ms": 102 + } +} diff --git a/data/output/00292757039c4e44293652db911f7c3f9025a713.json b/data/output/00292757039c4e44293652db911f7c3f9025a713.json new file mode 100644 index 0000000..328915a --- /dev/null +++ b/data/output/00292757039c4e44293652db911f7c3f9025a713.json @@ -0,0 +1,414 @@ +{ + "file_name": "00292757039c4e44293652db911f7c3f9025a713.txt", + "total_words": 831, + "top_n_words": [ + { + "word": "the", + "count": 64 + }, + { + "word": "of", + "count": 32 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "princesses", + "count": 13 + }, + { + "word": "beatrice", + "count": 12 + }, + { + "word": "they", + "count": 12 + }, + { + "word": "hanover", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "After .", + "length": 7 + }, + { + "text": "' The .", + "length": 7 + }, + { + "text": "5 and No.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Sean O'hare .", + "length": 13 + }, + { + "text": "Great Britain .", + "length": 15 + }, + { + "text": "Hanover City Hall.", + "length": 18 + }, + { + "text": "6 in line to the .", + "length": 18 + }, + { + "text": "Cambridge's wedding.", + "length": 20 + }, + { + "text": "Mini tour across Germany.", + "length": 25 + }, + { + "text": "11:58 EST, 18 January 2013 .", + "length": 28 + }, + { + "text": "20:35 EST, 18 January 2013 .", + "length": 28 + }, + { + "text": "blocked off part of the road.", + "length": 29 + }, + { + "text": "bemusement and mild curiosity.", + "length": 30 + }, + { + "text": "They even came in for a gentle .", + "length": 32 + }, + { + "text": "business and students to the UK.", + "length": 32 + }, + { + "text": "for Unicef for €90 000,' it read.", + "length": 35 + }, + { + "text": "journalists with a few German words.", + "length": 36 + }, + { + "text": "Following in the footsteps of their .", + "length": 37 + }, + { + "text": "The royal sisters were in Berlin to .", + "length": 37 + }, + { + "text": "The trip showcased the very best of .", + "length": 37 + }, + { + "text": "It went on to say that Beatrice 'did .", + "length": 38 + }, + { + "text": "a recap of exactly who they were ('No.", + "length": 38 + }, + { + "text": "With photographers and police in tow, .", + "length": 39 + }, + { + "text": "But the royals had not embarrassed their .", + "length": 42 + }, + { + "text": "'She sat at the ceremony directly behind .", + "length": 42 + }, + { + "text": "into the safety of the British embassy compound.", + "length": 48 + }, + { + "text": "Brandenburg Gate on their short journey to the British Embassy.", + "length": 63 + }, + { + "text": "the wearing of that Phillip Treacy hat to the Duke and Duchess of .", + "length": 67 + }, + { + "text": "sisters were on the final day of their two-day tour of Germany and .", + "length": 68 + }, + { + "text": "hosts, they had been waved through by German officers who had also .", + "length": 68 + }, + { + "text": "driver Eugenie and her older sister Beatrice then took a swift right .", + "length": 70 + }, + { + "text": "flag before listening to a speech by Mayor of Hanover Stephan Weil at .", + "length": 71 + }, + { + "text": "ribbing from the German press who covered the two-day engagement with .", + "length": 71 + }, + { + "text": "father Prince Andrew - dubbed 'Airmiles Andy' because of all his free .", + "length": 71 + }, + { + "text": "work on the Great Campaign - a global initiative to attract visitors, .", + "length": 71 + }, + { + "text": "Easy does it: Princesses feed the horse a carrot each from their palms .", + "length": 72 + }, + { + "text": "everything to make up for the lack of headgear - and even welcomed the .", + "length": 72 + }, + { + "text": "Queen Elizabeth II, 86, and wore a crazy hat, which was later auctioned .", + "length": 73 + }, + { + "text": "spent it in Hanover where they were met by small crowds waving the Union .", + "length": 74 + }, + { + "text": "throne'), readers were reminded of Beatrice's biggest claim to fame yet - .", + "length": 75 + }, + { + "text": "flights when he was UK trade envoy - the princesses were there to launch the .", + "length": 78 + }, + { + "text": "Welcome to Berlin: Mayor Stephan Weil welcomes Princesses Eugenie and Beatrice .", + "length": 80 + }, + { + "text": "Well turned out: Princes Beatrice and Eugenie pose with Premier of Lower Saxony David McAllister .", + "length": 98 + }, + { + "text": "The Princesses raised eyebrows yesterday after they drove the Mini through a red light near Berlin’s .", + "length": 104 + }, + { + "text": "The paper went on to add that they 'charmed their way through Berlin, vigorously beating the drum for their country.", + "length": 116 + }, + { + "text": "The Royal Family have strong ties with Germany as Queen Victoria's mother was German, as was her husband Prince Albert.", + "length": 119 + }, + { + "text": "German press reaction to the trip: 'The Berlin trip of the Merry Wives of Windsor', cried the Berliner Kurier's headline .", + "length": 122 + }, + { + "text": "Drawing a crowd: Princesses Beatrice and Eugenie meet school children on their two day tour of Germany to promote Britain .", + "length": 123 + }, + { + "text": "Presents galore: Princesses Beatrice and Eugenie are presented with gifts by Mayor of Hanover Stephan Weil at Hanover City Hall .", + "length": 129 + }, + { + "text": "They also visited the Equine Clinic at the University of Veterinary Medicine in Hanover where they fed a horse with a heart condition called Ben.", + "length": 145 + }, + { + "text": "Princesses Beatrice and Eugenie's first joint Royal engagement ended successfully today as flag-waving children turned out to greet them in the rain.", + "length": 149 + }, + { + "text": "Smiles all round: Princess: Beatrice and Eugenie of York stand next to Premier of Lower Saxony David McAllister at the opening of Herrenhausen Palace .", + "length": 151 + }, + { + "text": "They were in attendance for the opening of Herrenhausen Palace in Hanover which was rebuilt by the Volkswagen Foundation for 21 million euros after it was destroyed in WW II .", + "length": 175 + }, + { + "text": "'The Berlin trip of the Merry Wives of Windsor', cried the Berliner Kurier's headline, before introducing the princesses as 'young ladies in heels that looked like school girls'.", + "length": 178 + }, + { + "text": "Horsing around: Princesses Beatrice and Eugenie feed a horse with a heart condition called Ben as they visit the Equine Clinic at the University of Veterinary Medicine in Hanover .", + "length": 180 + }, + { + "text": "' It said they were on a 'patriotic mission' and jokingly remarked how it was strange that the Princess  chose not to wear hats when they were both famous for their 'extravagant hat creations.", + "length": 193 + }, + { + "text": "Princess Eugenie and Princess Beatrice pose for a photo with patient Malte Wassmann (left), aged 12, during a visit to the Teenage Cancer Treatment Unit at the University of Medicine in Hanover .", + "length": 195 + }, + { + "text": "' The Sueddeutsche newspaper referred to the princesses as 'hatless missionaries' of 'quasi-B prominence among the young Royals - after the A-list to the Princes Harry and William and pregnant Kate.", + "length": 199 + }, + { + "text": "British, from culture and technology to music and innovation, and ended with a trip to Hanover today for the re-opening of Schloss Herrenhausen, the city’s palace which has finally been rebuilt after it was destroyed by allied bombardment in 1943.", + "length": 249 + }, + { + "text": "' The Berliner Kurier said the princesses were like 'young ladies in heels that looked like school girls' The Sueddeutsche newspaper said the princesses were of 'quasi-B prominence among the young Royals - after the A-list to the Princes Harry and William and pregnant Kate' Princesses Beatrice and Eugenie of York attend the opening of Herrenhausen Palace in Hanover, Germany .", + "length": 378 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.44773226976394653 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:24.188172198Z", + "first_section_created": "2025-12-23T09:33:24.18847771Z", + "last_section_published": "2025-12-23T09:33:24.188756221Z", + "all_results_received": "2025-12-23T09:33:24.258462361Z", + "output_generated": "2025-12-23T09:33:24.258717571Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:24.18847771Z", + "publish_time": "2025-12-23T09:33:24.188756221Z", + "first_worker_start": "2025-12-23T09:33:24.189171137Z", + "last_worker_end": "2025-12-23T09:33:24.257581Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:24.18924184Z", + "start_time": "2025-12-23T09:33:24.189307842Z", + "end_time": "2025-12-23T09:33:24.189419447Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:24.189529Z", + "start_time": "2025-12-23T09:33:24.189688Z", + "end_time": "2025-12-23T09:33:24.257581Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:24.18925374Z", + "start_time": "2025-12-23T09:33:24.189322843Z", + "end_time": "2025-12-23T09:33:24.189455248Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:24.189093834Z", + "start_time": "2025-12-23T09:33:24.189171137Z", + "end_time": "2025-12-23T09:33:24.189205938Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4973, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/00298090b95424d625385fdedecf881de812d801.json b/data/output/00298090b95424d625385fdedecf881de812d801.json new file mode 100644 index 0000000..023b253 --- /dev/null +++ b/data/output/00298090b95424d625385fdedecf881de812d801.json @@ -0,0 +1,250 @@ +{ + "file_name": "00298090b95424d625385fdedecf881de812d801.txt", + "total_words": 580, + "top_n_words": [ + { + "word": "the", + "count": 34 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "police", + "count": 14 + }, + { + "word": "officers", + "count": 12 + }, + { + "word": "that", + "count": 12 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "home", + "count": 9 + }, + { + "word": "report", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "More than half of them were still serving at the time.", + "length": 54 + }, + { + "text": "Theresa is trying to tackle these historic issues to restore the public's trust.", + "length": 80 + }, + { + "text": "5 per cent and one per cent of the 200,000 police staff were 'potentially corrupt.", + "length": 82 + }, + { + "text": "'There has been a loss of confidence in the police as a result, which is quite dangerous.", + "length": 89 + }, + { + "text": "' It also adds how some officers 'used their powers to obtain money or sexual favours from the public.", + "length": 102 + }, + { + "text": "Critics say the 2003 Home Office report shows why no Home Secretary Theresa May is trying to tackle historical scandals .", + "length": 121 + }, + { + "text": "The 2002 report, produced as part of Operation Tiberius, an investigation into police corruption, named 80 corrupt officers.", + "length": 124 + }, + { + "text": "Critics say this Home Office report shows why now Home Secretary Theresa May is having to deal with historic police scandals.", + "length": 125 + }, + { + "text": "The report states that intelligence over a one year period from some forces involved in their research showed that between 0.", + "length": 125 + }, + { + "text": "The probe comes amid a series of police scandals that have related to recent inquiries involving phone-hacking and the Plebgate scandal.", + "length": 136 + }, + { + "text": "Between 1,000 and 2,000 officers have been suspected of tipping off criminals, stealing and fabricating evidence, says a Home Office report.", + "length": 140 + }, + { + "text": "However, a statement issued by Scotland Yard said said that the Met continued to investigate corruption, and has 'no complacency' about the matter.", + "length": 147 + }, + { + "text": "Earlier this year, an internal investigation at Scotland Yard by the Metropolitan Police claimed some officers helped organised criminals to hide evidence .", + "length": 156 + }, + { + "text": "The Home Office Select Committee will launch an investigation next month into police corruption after claims officers also used their power to get money and sex.", + "length": 161 + }, + { + "text": "' Among the corrupt practices listed by the 2003 report are dealing and using drugs, fraud and domestic violence as well using 'sexist, racist and homophobic behaviour.", + "length": 168 + }, + { + "text": "' It also pointed to raids where suspects could have been tipped off because when officers arrived there was no incriminating evidence and they already 'had the kettle on'.", + "length": 172 + }, + { + "text": "' Earlier this year, it also emerged that dozens of corrupt police officers helped organised criminals hide evidence, intimidate witnesses and access details of ongoing operations.", + "length": 180 + }, + { + "text": "The report also adds that corruption could be taking place at all levels and suggests that police should be dealt with behind closed doors if they commit crimes, to make it 'less damaging'.", + "length": 189 + }, + { + "text": "An internal investigation by the Metropolitan Police claimed officers were bribed to destroy surveillance logs and some officers even co-owned houses and racehorses with suspected gang leaders.", + "length": 193 + }, + { + "text": "Member of the Home Affairs Select Committee and Conservative MP Lorraine Fullbrook told the Independent: 'Labour's kid-gloves treatment of the police bred a lack of accountability which ultimately let down the public.", + "length": 217 + }, + { + "text": "Policing minister Mike Penning told the newspaper: 'The public expect the police to act with honesty and integrity and it is right that the full force of criminal law is available to punish and deter acts of corruption by police officers.", + "length": 238 + }, + { + "text": "' It explains: 'Corrupt activities across these examples have included the protection of criminals for financial payments, the theft and recycling of drugs to criminals, the stealing of money from crime scenes, and the fabrication of evidence to obtain convictions.", + "length": 265 + }, + { + "text": "A Home Office report revealed that the government estimate that at least 2,000 officers of all ranks could be corrupt (file picture) In a Home Office report analysis by researchers revealed that the government estimate that up to 2,000 officers of all ranks could be comprimising the police by dealing with criminals.", + "length": 317 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7190890908241272 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:24.68953571Z", + "first_section_created": "2025-12-23T09:33:24.689957527Z", + "last_section_published": "2025-12-23T09:33:24.690159935Z", + "all_results_received": "2025-12-23T09:33:24.748884844Z", + "output_generated": "2025-12-23T09:33:24.749063051Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:24.689957527Z", + "publish_time": "2025-12-23T09:33:24.690159935Z", + "first_worker_start": "2025-12-23T09:33:24.690751258Z", + "last_worker_end": "2025-12-23T09:33:24.747847Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:24.690682855Z", + "start_time": "2025-12-23T09:33:24.690751258Z", + "end_time": "2025-12-23T09:33:24.69080736Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:24.690961Z", + "start_time": "2025-12-23T09:33:24.6911Z", + "end_time": "2025-12-23T09:33:24.747847Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:24.690716157Z", + "start_time": "2025-12-23T09:33:24.690773159Z", + "end_time": "2025-12-23T09:33:24.690865062Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:24.690705656Z", + "start_time": "2025-12-23T09:33:24.690767659Z", + "end_time": "2025-12-23T09:33:24.69080986Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3598, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/0029837648b848c9cbde4c26744609e85f528a6c.json b/data/output/0029837648b848c9cbde4c26744609e85f528a6c.json new file mode 100644 index 0000000..3272b6f --- /dev/null +++ b/data/output/0029837648b848c9cbde4c26744609e85f528a6c.json @@ -0,0 +1,302 @@ +{ + "file_name": "0029837648b848c9cbde4c26744609e85f528a6c.txt", + "total_words": 554, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "war", + "count": 9 + }, + { + "word": "bucciarelli", + "count": 7 + }, + { + "word": "he", + "count": 7 + }, + { + "word": "on", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "\" Social media .", + "length": 16 + }, + { + "text": "You can follow him on Twitter.", + "length": 30 + }, + { + "text": "\"The images in South Sudan are different.", + "length": 41 + }, + { + "text": "\" His return in February was anything but.", + "length": 42 + }, + { + "text": "Months of ethnic violence had left thousands dead, 1.", + "length": 53 + }, + { + "text": "Frantic international pleas for peace have gone unheeded.", + "length": 57 + }, + { + "text": "\"I remember the look on people's faces,\" Bucciarelli said.", + "length": 58 + }, + { + "text": "\"Its apparent stillness leads to hear about it less and less.", + "length": 61 + }, + { + "text": "This time, Bucciarelli found a nation on the brink of an abyss.", + "length": 63 + }, + { + "text": "He described the violence in South Sudan as an \"invisible\" conflict.", + "length": 68 + }, + { + "text": "Machar belongs to the Nuer community, while the President is a Dinka.", + "length": 69 + }, + { + "text": "Follow @CNNPhotos on Twitter to join the conversation about photography.", + "length": 72 + }, + { + "text": "\"It represents one of the longest and most forgotten African wars,\" he said.", + "length": 76 + }, + { + "text": "\"Tired but happy, hopeful for a better future after a past full of pain and war.", + "length": 80 + }, + { + "text": "5 million people displaced and a looming famine, according to the United Nations.", + "length": 81 + }, + { + "text": "Bucciarelli's pictures provide a rare window into a civilian population under siege.", + "length": 84 + }, + { + "text": "Fabio Bucciarelli is an Italian photographer whose work focuses on conflict and war.", + "length": 84 + }, + { + "text": "The recent clashes have been a major setback for a country so desperate for a new start.", + "length": 88 + }, + { + "text": "\"South Sudan's war represents one of the longest and most forgotten African conflicts,\" he said.", + "length": 96 + }, + { + "text": "That was two years ago, after the world's youngest country split from its northern nemesis, Sudan.", + "length": 98 + }, + { + "text": "Since then, militia loyal to both have battled each other and targeted civilians from rival tribes.", + "length": 99 + }, + { + "text": "Instead of jubilant faces of hope, he saw defeated faces haunted by a return of ghosts of past wars.", + "length": 100 + }, + { + "text": "He found citizens worn out by decades of war -- but looking forward to an independent, peaceful future.", + "length": 103 + }, + { + "text": "(CNN)When photographer Fabio Bucciarelli first visited South Sudan, he found a nation filled with hope.", + "length": 103 + }, + { + "text": "\"It's hard to find front-line war photography of the kind we've seen in Libya or Syria,\" Bucciarelli said.", + "length": 106 + }, + { + "text": "Given the tragic consequences and repercussions on the population, I find it essential to bring it to light.", + "length": 108 + }, + { + "text": "\"Its tragic consequences are quietly fading away from the headlines, making these pictures even more crucial.", + "length": 109 + }, + { + "text": "Bucciarelli found despair cramped in tent camps, growing sexual violence and the recruitment of child soldiers.", + "length": 111 + }, + { + "text": "South Sudan split from Sudan in 2011 as part of a peace deal that ended decades of war in Africa's largest nation.", + "length": 114 + }, + { + "text": "That war left 2 million people dead and ended with the peace agreement that included an independence referendum for the south.", + "length": 126 + }, + { + "text": "Concerned neighboring nations have stepped in and urged warring parties to sign ceasefire deals, but the weapons have not gone quiet.", + "length": 133 + }, + { + "text": "The conflict has wiped out entire neighborhoods and transformed into a full-blown war between two large tribes, the Nuer and the Dinka.", + "length": 135 + }, + { + "text": "\" The latest conflict erupted in December last year, when President Salva Kiir accused his fired deputy, Riek Machar, of an attempted coup.", + "length": 139 + }, + { + "text": "\" His goal, he said, is to tell the stories of people rendered powerless by wars, provide objective images and shed light on human rights issues.", + "length": 145 + }, + { + "text": "Instead of relishing its independence, residents of areas such as Bentiu, Bor, Nyang and Mingkaman are struggling to survive as communities turn against one another.", + "length": 165 + }, + { + "text": "They describe visually the effect of the impending war, lending you a view of the civilian and military populations and providing a glimpse of life inside a murky, not clearly defined conflict.", + "length": 193 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.3644506335258484 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:25.190926424Z", + "first_section_created": "2025-12-23T09:33:25.191292338Z", + "last_section_published": "2025-12-23T09:33:25.191535848Z", + "all_results_received": "2025-12-23T09:33:25.258199369Z", + "output_generated": "2025-12-23T09:33:25.258398476Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:25.191292338Z", + "publish_time": "2025-12-23T09:33:25.191535848Z", + "first_worker_start": "2025-12-23T09:33:25.191992966Z", + "last_worker_end": "2025-12-23T09:33:25.257361Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:25.192000866Z", + "start_time": "2025-12-23T09:33:25.192085769Z", + "end_time": "2025-12-23T09:33:25.192160872Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:25.19224Z", + "start_time": "2025-12-23T09:33:25.192375Z", + "end_time": "2025-12-23T09:33:25.257361Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:25.191967064Z", + "start_time": "2025-12-23T09:33:25.192064468Z", + "end_time": "2025-12-23T09:33:25.192179973Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:25.191907862Z", + "start_time": "2025-12-23T09:33:25.191992966Z", + "end_time": "2025-12-23T09:33:25.192021067Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3336, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/002990836902c3de5b3545a83c7d53caee0dc0cb.json b/data/output/002990836902c3de5b3545a83c7d53caee0dc0cb.json new file mode 100644 index 0000000..4dc4a87 --- /dev/null +++ b/data/output/002990836902c3de5b3545a83c7d53caee0dc0cb.json @@ -0,0 +1,350 @@ +{ + "file_name": "002990836902c3de5b3545a83c7d53caee0dc0cb.txt", + "total_words": 794, + "top_n_words": [ + { + "word": "a", + "count": 33 + }, + { + "word": "she", + "count": 29 + }, + { + "word": "i", + "count": 25 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "steph", + "count": 17 + }, + { + "word": "the", + "count": 17 + }, + { + "word": "as", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "but", + "count": 13 + }, + { + "word": "her", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "'It might seem strange.", + "length": 23 + }, + { + "text": "I was a three-year-old boy.", + "length": 27 + }, + { + "text": "Steph said: 'I feel complete.", + "length": 29 + }, + { + "text": "My wife or life as a real woman.", + "length": 32 + }, + { + "text": "Relationships were few and far between.", + "length": 39 + }, + { + "text": "'But I knew I couldn't keep living a lie.", + "length": 41 + }, + { + "text": "Steph is now happier than she has ever been.", + "length": 44 + }, + { + "text": "'I had to make the hardest choice of my life.", + "length": 45 + }, + { + "text": "She has even met a woman and they are engaged .", + "length": 47 + }, + { + "text": "' She said: 'That was when I grew my hair long.", + "length": 47 + }, + { + "text": "'I hadn't even asked for them, she knew me so well.", + "length": 51 + }, + { + "text": "But she never told anyone, not even her closest family.", + "length": 55 + }, + { + "text": "Steph used to dress as a woman even before her operation.", + "length": 57 + }, + { + "text": "They are engaged but have not yet set a date for the wedding.", + "length": 61 + }, + { + "text": "'I stopped wearing girls' clothes and tried to lock away the urges.", + "length": 67 + }, + { + "text": "However, a few years later she finally realised she was transsexual.", + "length": 68 + }, + { + "text": "'My father also turned a blind eye as I danced around in my new outfits.", + "length": 72 + }, + { + "text": "' 'I've got a wonderful partner and the body I should have been born into.", + "length": 74 + }, + { + "text": "Steph Holmes, 62,  from Lancashire, rang in the new year as a woman (right).", + "length": 77 + }, + { + "text": "' This new attitude was a success and Steph went on to get married in her 40s.", + "length": 78 + }, + { + "text": "Steph with his mother Lilian before he underwent gender reassignment surgery .", + "length": 78 + }, + { + "text": "It comes 59 years after the retired IT teacher started wearing girls' clothes.", + "length": 78 + }, + { + "text": "Now Steph is finally living openly as a woman after having the surgery last April.", + "length": 82 + }, + { + "text": "I just wish mum could see me like this, but sadly she passed away a few years ago.", + "length": 82 + }, + { + "text": "So in March 2010, she visited doctors to undergo full gender reassignment surgery.", + "length": 82 + }, + { + "text": "But I'd always been a bit different - I preferred pink to blue and dolls to toy cars.", + "length": 85 + }, + { + "text": "She said: 'When I went to an all boys high school that I noticed how different I was.", + "length": 85 + }, + { + "text": "' After quitting the army aged 18, Steph, who was then still Stephen, grew his hair long .", + "length": 90 + }, + { + "text": "'Mum loved it, often coming round to braid my hair, she called me \"the daughter I never had\".", + "length": 93 + }, + { + "text": "She has also set up a support group for other women called Chrysalis Transsexual Support Group.", + "length": 95 + }, + { + "text": "'But mum couldn't care less and when we went shopping she'd let me choose the clothes I wanted.", + "length": 95 + }, + { + "text": "Here she is, right, walking along Douglas seafront in the Isle of Man in 1962 with her mother, left.", + "length": 100 + }, + { + "text": "Steph Holmes rang in 2015 as a fully-fledged woman – 59 years after she started dressing as a girl.", + "length": 101 + }, + { + "text": "She said: 'I decided to tell my wife my dream of becoming a woman and she said, \"I can't support you\".", + "length": 102 + }, + { + "text": "Steph said: 'From then on, I decided to be honest with future girlfriends and confessed on first dates.", + "length": 103 + }, + { + "text": "' Then, at the age of 19 Steph lost her virginity but it did not feel right, so she did not do it again.", + "length": 104 + }, + { + "text": "Steph as Stephen - as a young man, he tried desperately to fight his urges and engaged in manly hobbies .", + "length": 105 + }, + { + "text": "' Steph, pictured recovering from gender reassignment surgery, told her wife she needed to live as a woman .", + "length": 108 + }, + { + "text": "Steph said: 'My journey began when mum Lilian gave me a pair of black high heels for Christmas, 59 years ago.", + "length": 109 + }, + { + "text": "' It wasn't until Steph started senior school that she began to realise she was different to most boys her age.", + "length": 111 + }, + { + "text": "In 2012, she met a a 58-year-old woman called Fran, 58, from Stockport and the two women started a relationship.", + "length": 112 + }, + { + "text": "In her late 20s, she met a woman and wore her clothes in secret but when the lady found out, their romance ended.", + "length": 113 + }, + { + "text": "'At primary school I had to wear a boy's uniform but as soon as I got through the front door though, I'd change into something pretty.", + "length": 134 + }, + { + "text": "Keen to fight her natural urges, she even joined the army at 18 but quit two months later after realising she couldn't go through with it.", + "length": 138 + }, + { + "text": "New Year's Eve has always been a time for fresh beginnings, but for one 62-year-old from Darwen, Lancashire, midnight on December 31 was especially sweet.", + "length": 154 + }, + { + "text": "During her childhood as a boy called Stephen, Steph wore pretty dresses and always favoured pink over blue but when she hit her teenage years she reverted to boys' clothes.", + "length": 172 + }, + { + "text": "As a boy, his mother bought his first pair of heels aged 3 (left) For years afterwards, she secretly dressed as a woman, coming home from work and throwing on a dress and heels.", + "length": 177 + }, + { + "text": "Steph, who is a retired computer sciences teacher, was handed strappy black heels as a Christmas present when she was just three years old, but only built up the courage to have full gender reassignment surgery earlier last year.", + "length": 231 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.47181236743927 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:25.692306037Z", + "first_section_created": "2025-12-23T09:33:25.693595387Z", + "last_section_published": "2025-12-23T09:33:25.693797795Z", + "all_results_received": "2025-12-23T09:33:25.757061783Z", + "output_generated": "2025-12-23T09:33:25.757219189Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:25.693595387Z", + "publish_time": "2025-12-23T09:33:25.693797795Z", + "first_worker_start": "2025-12-23T09:33:25.694285714Z", + "last_worker_end": "2025-12-23T09:33:25.756048Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:25.694222912Z", + "start_time": "2025-12-23T09:33:25.694309215Z", + "end_time": "2025-12-23T09:33:25.694441521Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:25.694539Z", + "start_time": "2025-12-23T09:33:25.694695Z", + "end_time": "2025-12-23T09:33:25.756048Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:25.694216012Z", + "start_time": "2025-12-23T09:33:25.694285714Z", + "end_time": "2025-12-23T09:33:25.694375418Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:25.694230012Z", + "start_time": "2025-12-23T09:33:25.694298515Z", + "end_time": "2025-12-23T09:33:25.694363717Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4194, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/002a07b21f76ea8993987dabf3663897b442c533.json b/data/output/002a07b21f76ea8993987dabf3663897b442c533.json new file mode 100644 index 0000000..1370087 --- /dev/null +++ b/data/output/002a07b21f76ea8993987dabf3663897b442c533.json @@ -0,0 +1,262 @@ +{ + "file_name": "002a07b21f76ea8993987dabf3663897b442c533.txt", + "total_words": 450, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "her", + "count": 7 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "kuban", + "count": 7 + }, + { + "word": "men", + "count": 7 + }, + { + "word": "on", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "according", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Katie Davies .", + "length": 14 + }, + { + "text": "00:16 EST, 30 March 2013 .", + "length": 26 + }, + { + "text": "10:01 EST, 30 March 2013 .", + "length": 26 + }, + { + "text": "Kuban is accused of posing as his ex in email exchanges with the 'visitors'.", + "length": 76 + }, + { + "text": "' Kuban's work computer was seized by investigators as part of their search for evidence.", + "length": 89 + }, + { + "text": "In exchanges through the site they were told to park by the woman's mailbox and she would meet them.", + "length": 100 + }, + { + "text": "He wrote he was a 'senior lady' seeking to 'meet a gentleman in his 50s that can give me some pleasuring.", + "length": 105 + }, + { + "text": "Police claim the harassment began when the woman brought her six month relationship with Kuban to an end.", + "length": 105 + }, + { + "text": "He was arrested on Friday and is next due in court on April 1, if convicted he faces up to five years in prison.", + "length": 112 + }, + { + "text": "The woman previously attempted to get restraining orders against Kuban over the Craigslist posts, according to officers.", + "length": 120 + }, + { + "text": "He is accused of directing men to the 64-year-old woman's Virginia home, posting pictures of her and details of her address.", + "length": 124 + }, + { + "text": "He works as a preservationist at the Library of Congress’s facility in Culpeper, Virginia, according to the police report.", + "length": 124 + }, + { + "text": "It added: '[The woman] spends a good portion of her time searching Craigslist and ‘flagging’ these ads as inappropriate.", + "length": 124 + }, + { + "text": "Police also reported being called out on multiple occasions to 'chase away the men who had been enticed by these personal ads'.", + "length": 127 + }, + { + "text": "' The harassment got so bad the woman was forced to install security fences and cameras to ward the men off, according to officers.", + "length": 131 + }, + { + "text": "According to police, she was soon inundated with men - some from out of state - looking for the 'senior lady' mentioned in the post.", + "length": 132 + }, + { + "text": "The unidentified victim also put signs up outside her house warning any 'visitors' they were trespassing if they were responding to Craigslist adverts.", + "length": 151 + }, + { + "text": "Accused: According to reports Kenneth Kuban, pictured, is facing charges of stalking after allegedly posting sex adverts for his ex-girlfriend on Craigslist .", + "length": 158 + }, + { + "text": "The police report states the ads led to 'random men, often traveling from other States, to appear at [the victim's] property, looking to have sexual relations with her'.", + "length": 169 + }, + { + "text": "Kenneth Kuban, 61, an employee at the Library of Congress, has been accused of felony stalking after posting the 'Casual Encounters' adverts on the website, according to Smoking Gun.", + "length": 182 + }, + { + "text": "A spurned lover is facing jail after being charged for taking revenge on his ex by putting out sex adverts for her address on Craigslist - relentlessly attracting dozens of men from across the country.", + "length": 201 + }, + { + "text": "Craigslist: Kuban is accused of posting the adverts from a 'senior lady' looking for sex (FILE PIC) She reported the incidents to police and undercover officers posed as interested men to investigate the advert source.", + "length": 218 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7186667919158936 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:26.194306874Z", + "first_section_created": "2025-12-23T09:33:26.194644787Z", + "last_section_published": "2025-12-23T09:33:26.194852195Z", + "all_results_received": "2025-12-23T09:33:26.263729604Z", + "output_generated": "2025-12-23T09:33:26.26389871Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:26.194644787Z", + "publish_time": "2025-12-23T09:33:26.194852195Z", + "first_worker_start": "2025-12-23T09:33:26.195310513Z", + "last_worker_end": "2025-12-23T09:33:26.262797Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:26.195345115Z", + "start_time": "2025-12-23T09:33:26.195409317Z", + "end_time": "2025-12-23T09:33:26.19547392Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:26.195646Z", + "start_time": "2025-12-23T09:33:26.195811Z", + "end_time": "2025-12-23T09:33:26.262797Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:26.195383316Z", + "start_time": "2025-12-23T09:33:26.195449119Z", + "end_time": "2025-12-23T09:33:26.195513621Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:26.195253111Z", + "start_time": "2025-12-23T09:33:26.195310513Z", + "end_time": "2025-12-23T09:33:26.195344115Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2666, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/002a083c3893b1fde734280b9eec28d428a02d2b.json b/data/output/002a083c3893b1fde734280b9eec28d428a02d2b.json new file mode 100644 index 0000000..a6cc2cc --- /dev/null +++ b/data/output/002a083c3893b1fde734280b9eec28d428a02d2b.json @@ -0,0 +1,214 @@ +{ + "file_name": "002a083c3893b1fde734280b9eec28d428a02d2b.txt", + "total_words": 226, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "space", + "count": 7 + }, + { + "word": "colbert", + "count": 5 + }, + { + "word": "of", + "count": 5 + }, + { + "word": "a", + "count": 4 + }, + { + "word": "and", + "count": 4 + }, + { + "word": "discovery", + "count": 4 + }, + { + "word": "from", + "count": 4 + }, + { + "word": "is", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "iReport.", + "length": 8 + }, + { + "text": "Watch shuttle launch » .", + "length": 25 + }, + { + "text": "com: Discovery lights night sky .", + "length": 33 + }, + { + "text": "So it is an essential part,\" Coleman said.", + "length": 42 + }, + { + "text": "The new compartment was given the name Tranquility.", + "length": 51 + }, + { + "text": "The crew of seven astronauts includes one from Mexico and another from Sweden.", + "length": 78 + }, + { + "text": "NASA astronaut Cady Coleman said the treadmill is an essential addition to the space station.", + "length": 93 + }, + { + "text": "\"We have the treadmill now to keep them healthy, which is really part of being able to come home in one piece.", + "length": 110 + }, + { + "text": "Space shuttle Discovery lifts off late Friday from Kennedy Space Center in a photo from iReporter Alan Walters.", + "length": 111 + }, + { + "text": "(CNN) -- Space shuttle Discovery launched just before midnight Friday on a mission to the international space station.", + "length": 119 + }, + { + "text": "One of those seven, Nicole Stott, will remain on the station as a flight engineer, while astronaut Timothy Kopra is to return home aboard the shuttle.", + "length": 150 + }, + { + "text": "\" Colbert won an online poll conducted by NASA to name the newest space station compartment, but Colbert and the space agency compromised to give the moniker to the treadmill.", + "length": 175 + }, + { + "text": "Discovery's liftoff, originally set for Tuesday, had been postponed three times -- first for bad weather, and twice more while mission managers checked out indications of a faulty valve.", + "length": 186 + }, + { + "text": "Also on board: The Leonardo logistics module, science experiments and the Combined Operational Load Bearing External Resistance Treadmill (COLBERT), named for fake newsman Stephen Colbert of Comedy Central's \"The Colbert Report.", + "length": 228 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.724963903427124 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:26.695621284Z", + "first_section_created": "2025-12-23T09:33:26.697480458Z", + "last_section_published": "2025-12-23T09:33:26.697650164Z", + "all_results_received": "2025-12-23T09:33:26.756057061Z", + "output_generated": "2025-12-23T09:33:26.756179465Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:26.697480458Z", + "publish_time": "2025-12-23T09:33:26.697650164Z", + "first_worker_start": "2025-12-23T09:33:26.698173585Z", + "last_worker_end": "2025-12-23T09:33:26.755128Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:26.698186785Z", + "start_time": "2025-12-23T09:33:26.698239387Z", + "end_time": "2025-12-23T09:33:26.698275489Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:26.698397Z", + "start_time": "2025-12-23T09:33:26.69854Z", + "end_time": "2025-12-23T09:33:26.755128Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:26.698112682Z", + "start_time": "2025-12-23T09:33:26.698173585Z", + "end_time": "2025-12-23T09:33:26.698206986Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:26.698141684Z", + "start_time": "2025-12-23T09:33:26.698182285Z", + "end_time": "2025-12-23T09:33:26.698193786Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1421, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/002a48549068d7db596738c4f3fa2f5562372ab3.json b/data/output/002a48549068d7db596738c4f3fa2f5562372ab3.json new file mode 100644 index 0000000..27b1da5 --- /dev/null +++ b/data/output/002a48549068d7db596738c4f3fa2f5562372ab3.json @@ -0,0 +1,246 @@ +{ + "file_name": "002a48549068d7db596738c4f3fa2f5562372ab3.txt", + "total_words": 282, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "s", + "count": 5 + }, + { + "word": "to", + "count": 5 + }, + { + "word": "a", + "count": 4 + }, + { + "word": "above", + "count": 4 + }, + { + "word": "people", + "count": 4 + }, + { + "word": "see", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Poland's winter wonderland .", + "length": 28 + }, + { + "text": "Tornado strikes near Auckland .", + "length": 31 + }, + { + "text": "Hard to see in Chinese province .", + "length": 33 + }, + { + "text": "Flooding in Argentina's capital .", + "length": 33 + }, + { + "text": "Unlikely typhoon in the Philippines .", + "length": 37 + }, + { + "text": "About 150 homes were left without power.", + "length": 40 + }, + { + "text": "Typhoons are uncommon in the Bopha region.", + "length": 42 + }, + { + "text": "Check out the fog in the video above from CCTV.", + "length": 47 + }, + { + "text": "See the snow in the video above, courtesy of TVN.", + "length": 49 + }, + { + "text": "See some of the most serious flooding in the video above.", + "length": 57 + }, + { + "text": "In some areas, visibility was reduced to less than 200 meters.", + "length": 62 + }, + { + "text": "Seven centimeters of snow fell in the city of Lublin on Monday.", + "length": 63 + }, + { + "text": "Watch the video above to see how the storm knocked down power lines.", + "length": 68 + }, + { + "text": "In the nearby town of Bialystok, nine cars collided, causing one injury.", + "length": 72 + }, + { + "text": "The snow brought with it temperatures of minus 1 degree Celsius (30 degrees Fahrenheit).", + "length": 88 + }, + { + "text": "At least 148 people have died and thousands of homes have been destroyed, according to TV5.", + "length": 91 + }, + { + "text": "Typhoon Bopha devastated the Compostela Valley region in the southern Philippines early this week.", + "length": 98 + }, + { + "text": "Dense fog in the province of Sichuan caused heavy traffic and temporary highway closures in southwestern China.", + "length": 111 + }, + { + "text": "(CNN) -- This past week saw severe weather in many parts of the world that took dozens of lives and left behind serious damage.", + "length": 127 + }, + { + "text": "Here's a look at some of the extreme weather stories covered by CNN's global affiliates, including a typhoon in the Philippines and a tornado in New Zealand.", + "length": 157 + }, + { + "text": "Heavy rains in the Argentinian capital of Buenos Aires left two people dead, forced evacuations and flooded nearly 9 million acres of farmland, Canal 9 said.", + "length": 157 + }, + { + "text": "A tornado ripped through the outskirts of Auckland, New Zealand's largest city, killing three people and leaving more than 200 people injured, according to TVNZ.", + "length": 161 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.9221964478492737 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:27.198402553Z", + "first_section_created": "2025-12-23T09:33:27.198695164Z", + "last_section_published": "2025-12-23T09:33:27.198909773Z", + "all_results_received": "2025-12-23T09:33:27.260463793Z", + "output_generated": "2025-12-23T09:33:27.260585497Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:27.198695164Z", + "publish_time": "2025-12-23T09:33:27.198909773Z", + "first_worker_start": "2025-12-23T09:33:27.199393292Z", + "last_worker_end": "2025-12-23T09:33:27.258343Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:27.199476495Z", + "start_time": "2025-12-23T09:33:27.199543197Z", + "end_time": "2025-12-23T09:33:27.199589099Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:27.199739Z", + "start_time": "2025-12-23T09:33:27.199876Z", + "end_time": "2025-12-23T09:33:27.258343Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:27.199380491Z", + "start_time": "2025-12-23T09:33:27.199446894Z", + "end_time": "2025-12-23T09:33:27.199508696Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:27.199327889Z", + "start_time": "2025-12-23T09:33:27.199393292Z", + "end_time": "2025-12-23T09:33:27.199417692Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1673, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/002a6553123aa9c80a49b1e6e54c5a684975e452.json b/data/output/002a6553123aa9c80a49b1e6e54c5a684975e452.json new file mode 100644 index 0000000..4aa8e8e --- /dev/null +++ b/data/output/002a6553123aa9c80a49b1e6e54c5a684975e452.json @@ -0,0 +1,234 @@ +{ + "file_name": "002a6553123aa9c80a49b1e6e54c5a684975e452.txt", + "total_words": 322, + "top_n_words": [ + { + "word": "the", + "count": 12 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "mubarak", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "he", + "count": 6 + }, + { + "word": "were", + "count": 6 + }, + { + "word": "charges", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "He made his first court appearance in May.", + "length": 42 + }, + { + "text": "Journalist Adam Makary contributed to this report.", + "length": 50 + }, + { + "text": "Mubarak has been held since his guilty verdict last year.", + "length": 57 + }, + { + "text": "Mubarak and the other defendants have pleaded not guilty to all charges.", + "length": 72 + }, + { + "text": "He also faces a corruption charge that was not part of the original trial.", + "length": 74 + }, + { + "text": "He is also charged with seizing public funds and misusing political influence.", + "length": 78 + }, + { + "text": "After appealing their convictions, they were granted a new trial early this year.", + "length": 81 + }, + { + "text": "Mubarak's health has been a bone of contention during his trial and incarceration.", + "length": 82 + }, + { + "text": "The retrial has been adjourned until August 17, the prosecution said in a statement.", + "length": 84 + }, + { + "text": "Mubarak was forced from office in February 2011 following two weeks of mass street protests.", + "length": 92 + }, + { + "text": "Mubarak is accused of selling natural gas to neighboring Israel for prices below fair market value.", + "length": 99 + }, + { + "text": "The ousted autocratic leader spent three decades in charge of Egypt, the most populous Arab country.", + "length": 100 + }, + { + "text": "Mubarak was present in court Saturday for the fourth session of the retrial, wearing his signature sunglasses.", + "length": 110 + }, + { + "text": "A public prosecutor sent Mubarak -- who spent months detained in a military hospital -- back to prison in April.", + "length": 112 + }, + { + "text": "He suffered a heart attack after relinquishing power and had maintained that he was physically unfit to stand trial.", + "length": 116 + }, + { + "text": "But they, too, were ordered to be retried after President Mohamed Morsy, himself ousted this week, ordered a new investigation last year.", + "length": 137 + }, + { + "text": "Six of Mubarak's former security aides last year were acquitted of charges related to the killings, and Mubarak's two sons -- Gamal and Alaa -- were acquitted of corruption charges.", + "length": 181 + }, + { + "text": "Following a lengthy trial, he and his former interior minister, Habib al-Adly, were found guilty and sentenced to life in prison last year on charges that they were complicit in the protesters' killings.", + "length": 203 + }, + { + "text": "(CNN) -- The retrial of Egypt's former president, Hosni Mubarak, on charges of killing protesters during the country's 2011 revolution and profiteering resumed Saturday before being adjourned until next month.", + "length": 209 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8325613141059875 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:27.699696662Z", + "first_section_created": "2025-12-23T09:33:27.699979573Z", + "last_section_published": "2025-12-23T09:33:27.700255284Z", + "all_results_received": "2025-12-23T09:33:27.759698721Z", + "output_generated": "2025-12-23T09:33:27.759828626Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:27.699979573Z", + "publish_time": "2025-12-23T09:33:27.700255284Z", + "first_worker_start": "2025-12-23T09:33:27.700703202Z", + "last_worker_end": "2025-12-23T09:33:27.758764Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:27.700696202Z", + "start_time": "2025-12-23T09:33:27.700768304Z", + "end_time": "2025-12-23T09:33:27.700813306Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:27.70094Z", + "start_time": "2025-12-23T09:33:27.701063Z", + "end_time": "2025-12-23T09:33:27.758764Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:27.700716902Z", + "start_time": "2025-12-23T09:33:27.700782605Z", + "end_time": "2025-12-23T09:33:27.700834807Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:27.7006527Z", + "start_time": "2025-12-23T09:33:27.700703202Z", + "end_time": "2025-12-23T09:33:27.700720402Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1997, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/002a782196870886420684c7d1e8df561a307bd2.json b/data/output/002a782196870886420684c7d1e8df561a307bd2.json new file mode 100644 index 0000000..3dd3333 --- /dev/null +++ b/data/output/002a782196870886420684c7d1e8df561a307bd2.json @@ -0,0 +1,318 @@ +{ + "file_name": "002a782196870886420684c7d1e8df561a307bd2.txt", + "total_words": 564, + "top_n_words": [ + { + "word": "the", + "count": 37 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "are", + "count": 10 + }, + { + "word": "it", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "hair", + "count": 9 + }, + { + "word": "her", + "count": 9 + }, + { + "word": "in", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "We are the military.", + "length": 20 + }, + { + "text": "Wigs are also common.", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Commenting on the case Lt.", + "length": 26 + }, + { + "text": "It’s not on an individual basis.", + "length": 34 + }, + { + "text": "' 'I do think that it's a race issue.", + "length": 37 + }, + { + "text": "Dreadlocks however are still forbidden.", + "length": 39 + }, + { + "text": "Dreadlocks however are still forbidden .", + "length": 40 + }, + { + "text": "She refused and was honorably discharged .", + "length": 42 + }, + { + "text": "However her commanding officers disagreed.", + "length": 42 + }, + { + "text": "The 'two strand twist' was added to the list.", + "length": 45 + }, + { + "text": "' Changes: There are now 19 approved Navy hairstyles.", + "length": 53 + }, + { + "text": "Sims says her locks are narrow enough to be in keeping with the rule.", + "length": 69 + }, + { + "text": "Now the Navy has put a hold on the case to review the circumstances.", + "length": 69 + }, + { + "text": "This week the Navy updated regulations extending to 19 approved hairstyles.", + "length": 76 + }, + { + "text": "Senior officers then asked her to straighten her hair and tie it up tighter.", + "length": 76 + }, + { + "text": "Senior officers then asked her to straighten her hair and tie it up tighter.", + "length": 76 + }, + { + "text": "Jessica Sims, who had been serving for 12 years, wore dreadlocks in a small bun.", + "length": 80 + }, + { + "text": "She refused and, as a result, was honorably discharged for 'serious misconduct'.", + "length": 80 + }, + { + "text": "'Hair (including bun) is not to protrude from the opening in the back of the ball cap.", + "length": 86 + }, + { + "text": "'Appropriateness of a hairstyle shall be evaluated by its appearance when headgear is worn.", + "length": 91 + }, + { + "text": "Controversial: Jessica Sims, who had been serving for 12 years, wore dreadlocks in a small bun.", + "length": 95 + }, + { + "text": "Speaking to the site, Sims insisted that as she tied her hair back she fit in with regulations.", + "length": 95 + }, + { + "text": "'Hairstyles will not interfere with the proper wearing of headgear, protective masks or equipment.", + "length": 98 + }, + { + "text": "Navy regulations specifically ban 'widely spaced individual hanging locks,' according to section 2201.", + "length": 102 + }, + { + "text": "It included a new style - the two strand twist - and increased the bun size from two inches in diameter to four.", + "length": 112 + }, + { + "text": "Stephanie Homick, Spokesperson for the Chief of Naval Personnel, said: 'We hold every sailor to the same standard.", + "length": 114 + }, + { + "text": "They are unauthorized along with ponytails, pigtails and braids that are 'widely spaced and/or protrude from the head.", + "length": 118 + }, + { + "text": "All headgear shall fit snugly and comfortably around the largest part of the head without distortion or excessive gaps.", + "length": 119 + }, + { + "text": "Navy has altered its hair guidelines following controversy that saw a woman kicked out for refusing to change her style.", + "length": 120 + }, + { + "text": "The majority of the hairstyles that have the strictest regulations are hairstyles that black women would wear' she added.", + "length": 121 + }, + { + "text": "'When headgear is worn, hair shall not show from under the front of the headgear, with the exception of the Tiara and Beret.", + "length": 124 + }, + { + "text": "The first line of the recently updated rules state: 'Hairstyles and haircuts shall present a professional and balanced appearance.", + "length": 130 + }, + { + "text": "Updating regulations: The Navy has this week altered its guidelines which states a bun can be three inches from a servicewomen's scalp .", + "length": 136 + }, + { + "text": "' In an interview with the Navy Times, Sims argued that she had been wearing the same style for the majority of her career and was not sure why she was being asked to change it now\\ .", + "length": 183 + }, + { + "text": "She said: 'I don't think I should be told that I have to straighten my hair in order to be within what they think the regulations are, and I don't think I should have to cover it up with a wig.", + "length": 193 + }, + { + "text": "' Navy regulations state that hair can't protrude more than two inches from the head, so in order to make it lie flat, black women typically weave their hair into tight braids, cornrow it or straighten it using hot tools or chemical relaxers.", + "length": 242 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.707234799861908 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:28.201036774Z", + "first_section_created": "2025-12-23T09:33:28.201329985Z", + "last_section_published": "2025-12-23T09:33:28.201516693Z", + "all_results_received": "2025-12-23T09:33:28.2678304Z", + "output_generated": "2025-12-23T09:33:28.268055709Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:28.201329985Z", + "publish_time": "2025-12-23T09:33:28.201516693Z", + "first_worker_start": "2025-12-23T09:33:28.202126017Z", + "last_worker_end": "2025-12-23T09:33:28.266995Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:28.202092315Z", + "start_time": "2025-12-23T09:33:28.202172318Z", + "end_time": "2025-12-23T09:33:28.202233121Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:28.202339Z", + "start_time": "2025-12-23T09:33:28.202488Z", + "end_time": "2025-12-23T09:33:28.266995Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:28.202108116Z", + "start_time": "2025-12-23T09:33:28.202179219Z", + "end_time": "2025-12-23T09:33:28.202259522Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:28.202037013Z", + "start_time": "2025-12-23T09:33:28.202126017Z", + "end_time": "2025-12-23T09:33:28.202169618Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3271, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/002a8815af8e1aa7b34963af61ad9176b1f09a7d.json b/data/output/002a8815af8e1aa7b34963af61ad9176b1f09a7d.json new file mode 100644 index 0000000..92a5165 --- /dev/null +++ b/data/output/002a8815af8e1aa7b34963af61ad9176b1f09a7d.json @@ -0,0 +1,416 @@ +{ + "file_name": "002a8815af8e1aa7b34963af61ad9176b1f09a7d.txt", + "total_words": 950, + "top_n_words": [ + { + "word": "the", + "count": 47 + }, + { + "word": "and", + "count": 39 + }, + { + "word": "in", + "count": 34 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "his", + "count": 23 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "was", + "count": 18 + }, + { + "word": "he", + "count": 17 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "parkinson", + "count": 17 + } + ], + "sorted_sentences": [ + { + "text": "She .", + "length": 5 + }, + { + "text": "pain.", + "length": 5 + }, + { + "text": "'The .", + "length": 6 + }, + { + "text": "the community.", + "length": 14 + }, + { + "text": "the house’s panic room.", + "length": 25 + }, + { + "text": "'They too are telling us .", + "length": 26 + }, + { + "text": "wife hid in the strong room.", + "length": 28 + }, + { + "text": "He was a very well-liked man.", + "length": 29 + }, + { + "text": "Lt Co Parkinson, from Brighton.", + "length": 31 + }, + { + "text": "It was this decision which saved her.", + "length": 37 + }, + { + "text": "skills of many in the surrounding community.", + "length": 44 + }, + { + "text": "They have two daughters, Anna, 33 and Renate, 29.", + "length": 49 + }, + { + "text": "He was awarded an OBE in 1998 for his military service.", + "length": 55 + }, + { + "text": "Mrs Parkinson 52, managed to escape and barricade herself in .", + "length": 62 + }, + { + "text": "came out and found her bleeding husband and contacted the police.", + "length": 65 + }, + { + "text": "’ Sorry we are not currently accepting comments on this article.", + "length": 66 + }, + { + "text": "It was very romantic and it was a very personal time for both of us.", + "length": 68 + }, + { + "text": "that his loss is also their loss and that they too are sharing our .", + "length": 68 + }, + { + "text": "dynamic, doing an amazing job in Lolldaiga for both conservation and .", + "length": 70 + }, + { + "text": "' Despite her husband's death, Mrs Parkinson said she intends to remain in Kenya.", + "length": 81 + }, + { + "text": "couple sold their home in Hassocks, West Sussex, for £500,000 three years later.", + "length": 81 + }, + { + "text": "Lt Col Parkinson took up the post with Lolldaiga Hills at the start of this year.", + "length": 81 + }, + { + "text": "The Duchess said: ‘He’s a true romantic and we had a wonderful holiday in Africa.", + "length": 85 + }, + { + "text": "Grief: Friends of Lt Col Parkinson at his farm are coming to terms with his brutal murder .", + "length": 91 + }, + { + "text": "Mrs Parkinson was tied up and threatened and her husband attacked as he fought to defend her.", + "length": 93 + }, + { + "text": "Two men are expected to face trial for that attack, but two other raiders had not been traced.", + "length": 94 + }, + { + "text": "On his last tour before retiring, he commanded the British Army training organisation in Nairobi.", + "length": 97 + }, + { + "text": "In 2010, he and Kate holidayed there, staying in a secluded rustic log cabin where William proposed.", + "length": 100 + }, + { + "text": "Crime scene: David Parkinson's house in Nanyuki, Laikipia where robbers raided his home and killed him .", + "length": 104 + }, + { + "text": "The couple were asleep when the robbers smashed their way into their bedroom  and demanded money and guns.", + "length": 107 + }, + { + "text": "’ Lt Col Parkinson joined the Army in 1973 after graduating from the Royal Military Academy at Sandhurst.", + "length": 107 + }, + { + "text": "’ Lt Col Parkinson, who served in the Army for 30 years, became manager of the ranch after leaving Lewa in 2008.", + "length": 114 + }, + { + "text": "Mrs Parkinson, who owns a local arts and crafts company called Mitumba Art, was taken to hospital and treated for shock.", + "length": 120 + }, + { + "text": "East Sussex, was formerly a director of Lewa Wildlife Conservancy in Kenya, where Prince William spent part of his gap year.", + "length": 124 + }, + { + "text": "Police believe he may have been knocked unconscious before being struck with broad-bladed machetes, known locally as pangas.", + "length": 124 + }, + { + "text": "Lt Col Parkinson is believed to have bled to death during the raid in which the armed gang stole a laptop, $3 and bottles of wine.", + "length": 130 + }, + { + "text": "’ Last night the Duke of Cambridge, who is a patron of Lewa, said he was ‘saddened’ to hear about Lt Col Parkinson’s death.", + "length": 131 + }, + { + "text": "Before taking up his place at St Andrews University, Prince William spent his gap year at Lewa, helping with various conservation projects.", + "length": 139 + }, + { + "text": "Known to his fellow soldiers as Parky, he joined the Parachute Regiment and Airborne Forces, before becoming chief instructor at Sandhurst.", + "length": 139 + }, + { + "text": "Describing his work in conservation and training in Kenya, she said: 'His activities have resulted in direct improvements to the lives and .", + "length": 140 + }, + { + "text": "Local police chief Maurice Tum said the robbers gained entry by smashing the windows and ordered Lt Col Parkinson to hand over money and guns.", + "length": 142 + }, + { + "text": "' She also described her husband as 'a good man' and  'a loving husband ' and said the men who killed him posed a serious threat to other people.", + "length": 146 + }, + { + "text": "Police said there were parallels with the robbery and a raid on the home of another former British army officer and his wife in the region in June.", + "length": 147 + }, + { + "text": "In 2003, he and his wife permanently relocated to Kenya where he took up his post with Lewa, a growing conservation and community development project.", + "length": 150 + }, + { + "text": "Hacked to death: Lt Col David Parkinson (right) was attacked by five intruders as he was sleeping beside his wife of 35 years, Sonja (left), in Kenya .", + "length": 151 + }, + { + "text": "A local man has been detained and questioned over the attack on the Parkinsons but has not been formally arrested in connection with Sunday’s break-in.", + "length": 153 + }, + { + "text": "'A fight ensured between the men and the soldier and they cut off his left hand and left him bleeding profusely' Maurice Tum, local Kenyan police chief .", + "length": 153 + }, + { + "text": "The wife of a former Parachute Regiment officer who was hacked to death in Kenya has described his killers as ‘evil' and called his murder 'senseless'.", + "length": 153 + }, + { + "text": "Speaking about her husband's death for the first time today, Mrs Parkinson told the Daily Telegraph: 'David was brutally killed in the most senseless way.", + "length": 154 + }, + { + "text": "Last night, Kenyan police said the retired soldier’s hand was cut off as he struggled with the intruders in order to give his wife enough time to reach safety.", + "length": 161 + }, + { + "text": "A spokesman for the Foreign Office said: ‘We are aware of the death of a British national in Kenya and our team at the High Commission in Nairobi are providing consular assistance.", + "length": 182 + }, + { + "text": "Nature reserve manager Lt Col David Parkinson, 58, was killed by five robbers at the remote home he shared with his wife Sonja on Sunday morning in Lolldaiga Hills, near Mount Kenya National Park.", + "length": 196 + }, + { + "text": "Tragedy: Lt Col David Parkinson was killed in the farmhouse of the Lolldaiga Hills ranch in Kenya (pictured) He added: ‘A fight ensured between the men and the soldier and they cut off his left hand and left him bleeding profusely.", + "length": 233 + }, + { + "text": "'Saddened': The soldier was formerly a director of Lewa Wildlife Conservancy, where Prince William (right) spent his gap year as a 19-year-old and where he proposed to Kate Middleton (left, with Prince George) A friend and neighbour said: ‘He was very .", + "length": 255 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7709518373012543 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:28.702284782Z", + "first_section_created": "2025-12-23T09:33:28.702654796Z", + "last_section_published": "2025-12-23T09:33:28.703094113Z", + "all_results_received": "2025-12-23T09:33:28.798291756Z", + "output_generated": "2025-12-23T09:33:28.798567367Z", + "total_processing_time_ms": 96, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 95, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:28.702654796Z", + "publish_time": "2025-12-23T09:33:28.702933407Z", + "first_worker_start": "2025-12-23T09:33:28.703494929Z", + "last_worker_end": "2025-12-23T09:33:28.797051Z", + "total_journey_time_ms": 94, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:28.703631034Z", + "start_time": "2025-12-23T09:33:28.703696137Z", + "end_time": "2025-12-23T09:33:28.703805741Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:28.70383Z", + "start_time": "2025-12-23T09:33:28.704004Z", + "end_time": "2025-12-23T09:33:28.797051Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 93 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:28.703670536Z", + "start_time": "2025-12-23T09:33:28.703716538Z", + "end_time": "2025-12-23T09:33:28.703826942Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:28.703416526Z", + "start_time": "2025-12-23T09:33:28.703494929Z", + "end_time": "2025-12-23T09:33:28.703551031Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:28.702981509Z", + "publish_time": "2025-12-23T09:33:28.703094113Z", + "first_worker_start": "2025-12-23T09:33:28.703563332Z", + "last_worker_end": "2025-12-23T09:33:28.795228Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:28.703688837Z", + "start_time": "2025-12-23T09:33:28.703715138Z", + "end_time": "2025-12-23T09:33:28.703728038Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:28.704097Z", + "start_time": "2025-12-23T09:33:28.704231Z", + "end_time": "2025-12-23T09:33:28.795228Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:28.703670536Z", + "start_time": "2025-12-23T09:33:28.703710338Z", + "end_time": "2025-12-23T09:33:28.703743239Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:28.70352233Z", + "start_time": "2025-12-23T09:33:28.703563332Z", + "end_time": "2025-12-23T09:33:28.703576032Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 183, + "min_processing_ms": 90, + "max_processing_ms": 93, + "avg_processing_ms": 91, + "median_processing_ms": 93, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2731, + "slowest_section_id": 0, + "slowest_section_time_ms": 94 + } +} diff --git a/data/output/002ab555011a771d8390288495066e30028292ee.json b/data/output/002ab555011a771d8390288495066e30028292ee.json new file mode 100644 index 0000000..b44a216 --- /dev/null +++ b/data/output/002ab555011a771d8390288495066e30028292ee.json @@ -0,0 +1,406 @@ +{ + "file_name": "002ab555011a771d8390288495066e30028292ee.txt", + "total_words": 583, + "top_n_words": [ + { + "word": "i", + "count": 41 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "the", + "count": 18 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "it", + "count": 13 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "m", + "count": 9 + }, + { + "word": "on", + "count": 9 + }, + { + "word": "is", + "count": 8 + }, + { + "word": "s", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "7.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "8.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "6.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "9.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "(InStyle.", + "length": 9 + }, + { + "text": "I mix a lot.", + "length": 12 + }, + { + "text": "Smell yummy .", + "length": 13 + }, + { + "text": "I'm OCD like that.", + "length": 18 + }, + { + "text": "Go with the flow .", + "length": 18 + }, + { + "text": "Be a girlie girl .", + "length": 18 + }, + { + "text": "Laugh at yourself .", + "length": 19 + }, + { + "text": "Keep a food diary .", + "length": 19 + }, + { + "text": "E-mail to a friend .", + "length": 20 + }, + { + "text": "All rights reserved.", + "length": 20 + }, + { + "text": "Don't break the bank .", + "length": 22 + }, + { + "text": "Always look the part .", + "length": 22 + }, + { + "text": "Exercise your options .", + "length": 23 + }, + { + "text": "Don't be a dairy queen .", + "length": 24 + }, + { + "text": "But it all comes together.", + "length": 26 + }, + { + "text": "Cheese is ruining my life!", + "length": 26 + }, + { + "text": "Copyright © 2009 Time Inc.", + "length": 27 + }, + { + "text": "I have a couple by Anna Sui.", + "length": 28 + }, + { + "text": "I get a bike and ride around.", + "length": 29 + }, + { + "text": "It's got a little shimmer in it.", + "length": 32 + }, + { + "text": "I've never bought a car in my life.", + "length": 35 + }, + { + "text": "Oh, jeez, I'm thinking about it now...", + "length": 38 + }, + { + "text": "Very rarely do I go out without makeup.", + "length": 39 + }, + { + "text": "If I'm dressing up I might use perfume.", + "length": 39 + }, + { + "text": "\" So I'll put something shimmery on top.", + "length": 40 + }, + { + "text": "And Gwen Stefani's perfume [L], is good too.", + "length": 44 + }, + { + "text": "If it's a nice day, outdoor activity is nice.", + "length": 45 + }, + { + "text": "Watch how Carrie handles romantic distress » .", + "length": 47 + }, + { + "text": "Get a FREE TRIAL issue of InStyle - CLICK HERE!", + "length": 47 + }, + { + "text": "My favorite body lotion is Benefit's Maybe Baby.", + "length": 48 + }, + { + "text": "I tend to go after bigger, flowy tops or dresses.", + "length": 49 + }, + { + "text": "I'll put on lip gloss and go, \"That needs to shimmer.", + "length": 53 + }, + { + "text": "With makeup I think it's important for girls to play.", + "length": 53 + }, + { + "text": "Then I want it a little pinker, so I put pink on top.", + "length": 53 + }, + { + "text": "I also do the treadmill, free weights, exercise ball.", + "length": 53 + }, + { + "text": "I'm trying to talk myself into being lactose-intolerant.", + "length": 56 + }, + { + "text": "Plus, I get a decent cardio workout every night onstage.", + "length": 56 + }, + { + "text": "Usually I come up through the floor to get to the stage.", + "length": 56 + }, + { + "text": "I don't buy expensive jewelry and fur coats and Escalades.", + "length": 58 + }, + { + "text": "Always take an hour for yourself to go do something positive.", + "length": 61 + }, + { + "text": "com) -- Singer Carrie Underwood dishes on dieting, beauty and fashion.", + "length": 70 + }, + { + "text": "I don't do smelly soaps or wear perfume, so lotion is kind of my perfume.", + "length": 73 + }, + { + "text": "Somebody, somewhere, will catch you, even if it's a fan at the grocery store.", + "length": 77 + }, + { + "text": "I'm trying to lay off the black eyeliner, though--I love it a little too much.", + "length": 78 + }, + { + "text": "I mean, it's basically moldy milk, and it doesn't smell that great--but it's so good!", + "length": 85 + }, + { + "text": "Carrie Underwood in a Michael Kors wool crêpe dress and Kenneth Jay Lane link bracelet.", + "length": 88 + }, + { + "text": "Most of the time I slap on some makeup in the morning so I don't look bad if somebody snaps a picture.", + "length": 102 + }, + { + "text": "The car I have, a Ford Mustang convertible, is the one I won on Idol--I guess I need to go buy a new car after all!", + "length": 115 + }, + { + "text": "I like the elliptical machine or swimming if I'm in a hotel with a decent pool and there aren't too many people there.", + "length": 118 + }, + { + "text": "I count calories, fat and fiber--which is important in making you feel fuller faster--and protein, especially when I'm working out.", + "length": 131 + }, + { + "text": "I can't stand up on the little platform or the audience will see me, so I'm squatting down with my rear end pressed up against the back of it.", + "length": 142 + }, + { + "text": "I just laugh because the audience thinks this is so glamorous--and here I am under the stage with these ropes and equipment, my butt pressed against plastic!", + "length": 157 + }, + { + "text": "I know I'm doing myself a disservice, but I'd rather put on a muumuu and have people at least think there might be a skinny person underneath than put on something tight and have them think, Look at that gut!", + "length": 208 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4392245411872864 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:29.203982607Z", + "first_section_created": "2025-12-23T09:33:29.205510167Z", + "last_section_published": "2025-12-23T09:33:29.205700875Z", + "all_results_received": "2025-12-23T09:33:29.274429077Z", + "output_generated": "2025-12-23T09:33:29.274627085Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:29.205510167Z", + "publish_time": "2025-12-23T09:33:29.205700875Z", + "first_worker_start": "2025-12-23T09:33:29.206263497Z", + "last_worker_end": "2025-12-23T09:33:29.27344Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:29.206219995Z", + "start_time": "2025-12-23T09:33:29.206286598Z", + "end_time": "2025-12-23T09:33:29.2063527Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:29.206498Z", + "start_time": "2025-12-23T09:33:29.206627Z", + "end_time": "2025-12-23T09:33:29.27344Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:29.206192794Z", + "start_time": "2025-12-23T09:33:29.206263497Z", + "end_time": "2025-12-23T09:33:29.206368801Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:29.206191794Z", + "start_time": "2025-12-23T09:33:29.206269197Z", + "end_time": "2025-12-23T09:33:29.206309799Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2917, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/002af01994793e515bd099fc3c8ec53bb76d5518.json b/data/output/002af01994793e515bd099fc3c8ec53bb76d5518.json new file mode 100644 index 0000000..b9cce57 --- /dev/null +++ b/data/output/002af01994793e515bd099fc3c8ec53bb76d5518.json @@ -0,0 +1,282 @@ +{ + "file_name": "002af01994793e515bd099fc3c8ec53bb76d5518.txt", + "total_words": 494, + "top_n_words": [ + { + "word": "he", + "count": 20 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "the", + "count": 13 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "crampton", + "count": 7 + }, + { + "word": "leicester", + "count": 7 + }, + { + "word": "that", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Alex Ward .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "11:04 EST, 2 October 2012 .", + "length": 27 + }, + { + "text": "02:02 EST, 3 October 2012 .", + "length": 27 + }, + { + "text": "‘If he is only 19 now he must have been .", + "length": 43 + }, + { + "text": "This man was very young when he began and has been prolific.", + "length": 60 + }, + { + "text": "The 60-year-old said: ‘It did cause great upset at the time.", + "length": 62 + }, + { + "text": "Adam Crampton claims to have burgled 278 homes and stolen 60 cars .", + "length": 67 + }, + { + "text": "also that the police have never given up even after all these years.", + "length": 68 + }, + { + "text": "’ Crampton will be sentenced at Leicester Crown Court on October 23.", + "length": 70 + }, + { + "text": "I just hope he doesn’t, now that he has admitted all of these crimes.", + "length": 71 + }, + { + "text": "He will be sentenced at Leicester Crown Court (pictured) on October 23 .", + "length": 72 + }, + { + "text": "very young then, only a child but it’s comforting to know who did it and .", + "length": 76 + }, + { + "text": "He even took officers on tours of Leicester four times, pointing out homes he had targeted.", + "length": 91 + }, + { + "text": "‘You hear that a lot of people in prison go back to their old ways when they are released.", + "length": 92 + }, + { + "text": "‘He was visited by police officers and he indicated he wanted to clear up all of his outstanding crimes.", + "length": 106 + }, + { + "text": "’ Sylvia Hammond, one of his earliest victims, told of her relief at discovering the identity of her intruder.", + "length": 112 + }, + { + "text": "Crampton stole a laptop, a phone and a digital camera from Mrs Hammond’s home in Oakham, Rutland, in July 2007.", + "length": 113 + }, + { + "text": "A teenager wants to ‘clear his slate’ after allegedly burgling 278 homes and stealing 60 cars since he was 12.", + "length": 114 + }, + { + "text": "Steve Morris, defending, said: ‘He has volunteered the information to get things cleared up and he wants to do this as soon as possible.", + "length": 138 + }, + { + "text": "Adam Crampton, 19, has been a ‘prolific’ one man crimewave, especially in the Leicester area where up to 200 of the burglaries took place.", + "length": 142 + }, + { + "text": "’ Prosecutor Ian Johnson told the court: ‘He was sentenced to four years in March this year for offences of robbery and dwelling house burglary.", + "length": 148 + }, + { + "text": "Clear up crimes: Prosecutor Ian Johnson told Leicester Magistrates' Court that Crampton 'indicated he wanted to clear up all of his outstanding crimes'.", + "length": 152 + }, + { + "text": "‘One of the aims of the criminal justice system is to help people turn their lives around and hopefully this is the beginning of that process for him.", + "length": 152 + }, + { + "text": "Inspector Andy Parkes, of Leicester Police said: ‘Victims want to know that someone has been caught and held responsible for crimes committed against them.", + "length": 157 + }, + { + "text": "’ Confessed to crimes: Crampton revealed the full extent of his criminal history in interviews with police, even taking them on tours, pointing out homes he targeted .", + "length": 169 + }, + { + "text": "He pleaded guilty to three specimen counts of burglary at Leicester Magistrates’ Court recently but has asked that a further 278 break-ins and 60 car thefts be taken into consideration when he is sentenced later this month.", + "length": 225 + }, + { + "text": "Crampton, of no fixed address, revealed the full extent of his criminal history in interviews with police at a young offenders’ institution earlier this year as he began a four-year sentence for robbery and burglary offences in March.", + "length": 236 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6539815664291382 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:29.706483664Z", + "first_section_created": "2025-12-23T09:33:29.707785815Z", + "last_section_published": "2025-12-23T09:33:29.708045526Z", + "all_results_received": "2025-12-23T09:33:29.780592178Z", + "output_generated": "2025-12-23T09:33:29.780771285Z", + "total_processing_time_ms": 74, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 72, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:29.707785815Z", + "publish_time": "2025-12-23T09:33:29.708045526Z", + "first_worker_start": "2025-12-23T09:33:29.708605948Z", + "last_worker_end": "2025-12-23T09:33:29.779401Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:29.708694051Z", + "start_time": "2025-12-23T09:33:29.708748953Z", + "end_time": "2025-12-23T09:33:29.708816656Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:29.70884Z", + "start_time": "2025-12-23T09:33:29.708988Z", + "end_time": "2025-12-23T09:33:29.779401Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:29.708621448Z", + "start_time": "2025-12-23T09:33:29.708692051Z", + "end_time": "2025-12-23T09:33:29.708762754Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:29.708529345Z", + "start_time": "2025-12-23T09:33:29.708605948Z", + "end_time": "2025-12-23T09:33:29.708640649Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2856, + "slowest_section_id": 0, + "slowest_section_time_ms": 71 + } +} diff --git a/data/output/002b4899d3d09c6d67904a3115d68004c8e96ab9.json b/data/output/002b4899d3d09c6d67904a3115d68004c8e96ab9.json new file mode 100644 index 0000000..4563137 --- /dev/null +++ b/data/output/002b4899d3d09c6d67904a3115d68004c8e96ab9.json @@ -0,0 +1,246 @@ +{ + "file_name": "002b4899d3d09c6d67904a3115d68004c8e96ab9.txt", + "total_words": 482, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "his", + "count": 8 + }, + { + "word": "hayward", + "count": 7 + }, + { + "word": "on", + "count": 7 + }, + { + "word": "year", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "8 million – worth about £600,000 a year.", + "length": 45 + }, + { + "text": "Hated: Tony Hayward at a Washington inquiry with his wife (circled).", + "length": 68 + }, + { + "text": "The marriage will officially be dissolved with  a decree absolute in six weeks.", + "length": 80 + }, + { + "text": "‘It’s not something she’d talk about –  they are both very private people.", + "length": 83 + }, + { + "text": "Mr Hayward’s departure from BP was reportedly eased by a pension pot valued at £10.", + "length": 86 + }, + { + "text": "But now former BP boss Tony Hayward’s partner has called time on their 27-year marriage.", + "length": 90 + }, + { + "text": "He even found himself on the wrong side of vitriolic attacks led by US President Barack Obama.", + "length": 94 + }, + { + "text": "She put her career on hold to bring up the  couple’s two children, Kieran, 22, and Tara, 18.", + "length": 95 + }, + { + "text": "The Haywards married in Edinburgh in 1985, when the couple were both working in junior  positions at BP.", + "length": 105 + }, + { + "text": "’ Mr Hayward did not contest his wife’s application for a decree nisi, which was granted in  50 seconds.", + "length": 109 + }, + { + "text": "She was the fiercely loyal wife, standing by a husband branded America’s ‘most hated and most clueless man’.", + "length": 114 + }, + { + "text": "It prompted Ms Fulton, a geophysicist by training, to start writing a book that year to defend her beleaguered husband.", + "length": 119 + }, + { + "text": "Maureen Fulton has been granted a ‘quickie’ divorce, citing the 55-year-old oil chief’s ‘unreasonable behaviour’.", + "length": 123 + }, + { + "text": "The businessman’s acceptance of the grounds for the divorce meant that no details were given  in court about his behaviour.", + "length": 126 + }, + { + "text": "He later took up the post of chief executive  of Mayfair-based Genel Energy, which is developing oil production in Kurdistan.", + "length": 126 + }, + { + "text": "My Hayward took up the post of chief executive of Mayfair-based Genel Energy, which is developing oil production in Kurdistan .", + "length": 127 + }, + { + "text": "The 51-year-old refused to comment on her  reasons for seeking a divorce when asked at the family’s house near Sevenoaks, Kent.", + "length": 130 + }, + { + "text": "When approached for a comment on his  marriage split, a spokeswoman for Genel Energy said that Mr Hayward would not be making any statement on the matter.", + "length": 155 + }, + { + "text": "His remark that ‘I would like my life back’ and pictures of him relaxing on his yacht at the peak of the uproar fuelled the campaign to oust him from his post.", + "length": 163 + }, + { + "text": "The decree nisi came only months after Ms Fulton was reported to be leading the fightback  to restore her husband’s tattered reputation following the 2010 Deepwater Horizon rig disaster.", + "length": 189 + }, + { + "text": "Maureen Fulton has een granted a divorce citing the 55-year-old's 'unreasonable behaviour' The oil spill in the Gulf of Mexico – the worst  in history – forced Mr Hayward out of his £4 million-a-year job as BP’s chief executive.", + "length": 238 + }, + { + "text": "Disaster: Maureen Fulton was reported to be leading the fightback to restore her husband's tattered reputation following the Deepwater Horizon oil rig catastrophe (pictured) ‘I’d heard in the past of rumours that they were having problems but I haven’t heard about anyone else being involved,’ said one of her friends yesterday.", + "length": 336 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8235841989517212 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:30.208800514Z", + "first_section_created": "2025-12-23T09:33:30.210028062Z", + "last_section_published": "2025-12-23T09:33:30.210201469Z", + "all_results_received": "2025-12-23T09:33:30.276807888Z", + "output_generated": "2025-12-23T09:33:30.276942193Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:30.210028062Z", + "publish_time": "2025-12-23T09:33:30.210201469Z", + "first_worker_start": "2025-12-23T09:33:30.210661287Z", + "last_worker_end": "2025-12-23T09:33:30.275881Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:30.210660087Z", + "start_time": "2025-12-23T09:33:30.21073219Z", + "end_time": "2025-12-23T09:33:30.210787692Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:30.210815Z", + "start_time": "2025-12-23T09:33:30.210985Z", + "end_time": "2025-12-23T09:33:30.275881Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:30.210614985Z", + "start_time": "2025-12-23T09:33:30.210678688Z", + "end_time": "2025-12-23T09:33:30.210791492Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:30.210598885Z", + "start_time": "2025-12-23T09:33:30.210661287Z", + "end_time": "2025-12-23T09:33:30.210689988Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2820, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/002b54553ecad779ce33107fe42af77537c6cbfd.json b/data/output/002b54553ecad779ce33107fe42af77537c6cbfd.json new file mode 100644 index 0000000..0358b10 --- /dev/null +++ b/data/output/002b54553ecad779ce33107fe42af77537c6cbfd.json @@ -0,0 +1,250 @@ +{ + "file_name": "002b54553ecad779ce33107fe42af77537c6cbfd.txt", + "total_words": 455, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "home", + "count": 9 + }, + { + "word": "an", + "count": 8 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "located", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "1 metres frontage of a picturesque setting.", + "length": 43 + }, + { + "text": "1 metres frontage of a picturesque setting .", + "length": 44 + }, + { + "text": "2 million dollars and had just the one owner since 1981.", + "length": 56 + }, + { + "text": "Sprawled across 296 square metre block of land, it has an estimated 10.", + "length": 71 + }, + { + "text": "Sprawled across 296 square metre block of land, it has an estimated 10.", + "length": 71 + }, + { + "text": "The single story home was listed on the market, with an asking price of $3.", + "length": 75 + }, + { + "text": "The well-appointed kitchen features polishing sealed wooden cupboards, benchtops, dishwasher and an oven .", + "length": 106 + }, + { + "text": "Located in Melbourne’s most affluent suburb of St Kilda, the humble home is located opposite Catani Gardens .", + "length": 112 + }, + { + "text": "Featuring an ensuite of amenities, the two bedroom home features two bathrooms, laundry room and a double garage .", + "length": 115 + }, + { + "text": "The humble home at 356 Beaconsfield Parade in Victoria features a spacious living room, perfect for starting a family .", + "length": 119 + }, + { + "text": "Located in Melbourne’s most affluent suburb of St Kilda, the waterfront strip is set on a prized corner of Mary Street .", + "length": 122 + }, + { + "text": "With property prices skyrocketing and space at a premium, the worst house on the best street has never been more valuable.", + "length": 122 + }, + { + "text": "Located in Melbourne’s most affluent suburb of St Kilda, the waterfront strip is set on a prized corner of Mary Street .", + "length": 122 + }, + { + "text": "The single story home was listed on the market for the very first time in 33 years and has had just the one owner since 1981 .", + "length": 126 + }, + { + "text": "Featuring an ensuite of amenities, the home delivers an outstanding beachfront allotment and a walking distance to vibrant Fitzroy Street.", + "length": 138 + }, + { + "text": "Located in Melbourne’s most affluent suburb of St Kilda, the waterfront strip is set on a prized corner of Mary Street and opposite Catani Gardens.", + "length": 149 + }, + { + "text": "With secure access to the three car garage with a concealed cocktail bar and restaurant-standard kitchen, it provides ease of entertaining for the whole family.", + "length": 160 + }, + { + "text": "Located at 261 Beaconsfield Parade in Middle Park, the recently renovated and designed home features four bedrooms, open living and dining room and four bathrooms.", + "length": 163 + }, + { + "text": "The humble home at 356 Beaconsfield Parade in Victoria features two bedrooms, a spacious living room, dining area, well-appointed kitchen, two bathrooms and a laundry room.", + "length": 172 + }, + { + "text": "While the worst house has arisen on one of Melbourne’s finest streets, the three-storey modern mansion located just three minutes away is set to sell for more than $10 million dollars.", + "length": 186 + }, + { + "text": "It sets a benchmark for contemporary bayside living, with stainless steel kitchen and floor-to-ceiling glass doors that open to the poolside travertine terrace with stainless steel kitchen and BBQ.", + "length": 197 + }, + { + "text": "With excellent access to double garage and private courtyard, experts believe the property is a perfect opportunity for new home owners, renovators or developers to upgrade for a tremendous scope to capitalise or even an extensive overhaul.", + "length": 240 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.36096516251564026 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:30.710986081Z", + "first_section_created": "2025-12-23T09:33:30.712445644Z", + "last_section_published": "2025-12-23T09:33:30.71260475Z", + "all_results_received": "2025-12-23T09:33:30.774756714Z", + "output_generated": "2025-12-23T09:33:30.774906521Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:30.712445644Z", + "publish_time": "2025-12-23T09:33:30.71260475Z", + "first_worker_start": "2025-12-23T09:33:30.713104272Z", + "last_worker_end": "2025-12-23T09:33:30.773895Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:30.713156474Z", + "start_time": "2025-12-23T09:33:30.713218977Z", + "end_time": "2025-12-23T09:33:30.71329178Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:30.713308Z", + "start_time": "2025-12-23T09:33:30.713464Z", + "end_time": "2025-12-23T09:33:30.773895Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:30.713132273Z", + "start_time": "2025-12-23T09:33:30.713204176Z", + "end_time": "2025-12-23T09:33:30.71329858Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:30.713042369Z", + "start_time": "2025-12-23T09:33:30.713104272Z", + "end_time": "2025-12-23T09:33:30.713129473Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2751, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/002b7e95c33fdedcd8c9ad70365df18517266098.json b/data/output/002b7e95c33fdedcd8c9ad70365df18517266098.json new file mode 100644 index 0000000..4d073fd --- /dev/null +++ b/data/output/002b7e95c33fdedcd8c9ad70365df18517266098.json @@ -0,0 +1,234 @@ +{ + "file_name": "002b7e95c33fdedcd8c9ad70365df18517266098.txt", + "total_words": 408, + "top_n_words": [ + { + "word": "the", + "count": 24 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "she", + "count": 9 + }, + { + "word": "is", + "count": 8 + }, + { + "word": "has", + "count": 6 + }, + { + "word": "was", + "count": 6 + }, + { + "word": "for", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "The UK's Public Health Agency warns that 'breathing in helium...", + "length": 64 + }, + { + "text": "She fell into a coma after inhaling helium gas, which can prove fatal .", + "length": 71 + }, + { + "text": "Police are investigating the incident and the television station involved has apologised .", + "length": 90 + }, + { + "text": "' Despite the young girl's public status, her name has not been revealed because she is a minor.", + "length": 96 + }, + { + "text": "TV Asahi has apologised for the incident and said an internal probe into the case was under way.", + "length": 96 + }, + { + "text": "The canister from which she inhaled the gas was marked for adult use only, the channel admitted.", + "length": 96 + }, + { + "text": "3B Junior: Despite the girl's public status, her name has not been revealed because she is a minor.", + "length": 99 + }, + { + "text": "deprives vital organs of essential oxygen' and can can cause 'dizziness, headache and suffocation'.", + "length": 99 + }, + { + "text": "In 2012, a popular comedian broke his back when he jumped into a swimming pool as part of a programme.", + "length": 102 + }, + { + "text": "Inhaling helium is a common party trick, but can prove fatal and deaths have previously been recorded.", + "length": 102 + }, + { + "text": "Local media said police had also launched an investigation on suspicion of professional negligence resulting in injury.", + "length": 119 + }, + { + "text": "A 12-year-old member of a Japanese pop group has been in a coma for more than a week after inhaling helium during a television show stunt.", + "length": 138 + }, + { + "text": "She was inhaling helium from a canister as part of a game that involved changing her voice, when she fell unconscious and has yet to recover.", + "length": 141 + }, + { + "text": "The unnamed girl is a member of the 3B Junior group, a collective of 25 singers aged between 10 and 16 that are hugely popular in the country.", + "length": 142 + }, + { + "text": "Popular: The unnamed 12-year-old is part of the 3B Junior group, a collective of singers aged between 10 and 16 who are hugely popular in Japan.", + "length": 144 + }, + { + "text": "She was immediately rushed to a Tokyo hospital, where local media reported she was found to have suffered an embolism in the blood supply to her brain.", + "length": 151 + }, + { + "text": "A spokesman for TV Asahi revealed the girl was one of five youngsters taking part in the recording of a variety show on January 28 when the incident happened.", + "length": 158 + }, + { + "text": "The girl is believed to now be showing signs of activity, including the moving of her eyes and limb, and the spokesman added: 'There are signs of recovery, but she is still not fully fit.", + "length": 189 + }, + { + "text": "Japanese TV is internationally renowned for the outrageous nature of their game shows, which often feature members of the public and celebrities compting in shows that involve dangerous or humiliating tasks.", + "length": 207 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5767571330070496 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:31.213429717Z", + "first_section_created": "2025-12-23T09:33:31.213838834Z", + "last_section_published": "2025-12-23T09:33:31.214048543Z", + "all_results_received": "2025-12-23T09:33:31.279633954Z", + "output_generated": "2025-12-23T09:33:31.279789061Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:31.213838834Z", + "publish_time": "2025-12-23T09:33:31.214048543Z", + "first_worker_start": "2025-12-23T09:33:31.21468047Z", + "last_worker_end": "2025-12-23T09:33:31.278773Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:31.214657269Z", + "start_time": "2025-12-23T09:33:31.214716672Z", + "end_time": "2025-12-23T09:33:31.214771974Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:31.214889Z", + "start_time": "2025-12-23T09:33:31.215031Z", + "end_time": "2025-12-23T09:33:31.278773Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:31.21466527Z", + "start_time": "2025-12-23T09:33:31.214724372Z", + "end_time": "2025-12-23T09:33:31.214796375Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:31.214617468Z", + "start_time": "2025-12-23T09:33:31.21468047Z", + "end_time": "2025-12-23T09:33:31.214701471Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2320, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/002b864bf68d00d74d0cad76be4cbc049f7321ca.json b/data/output/002b864bf68d00d74d0cad76be4cbc049f7321ca.json new file mode 100644 index 0000000..15da0b5 --- /dev/null +++ b/data/output/002b864bf68d00d74d0cad76be4cbc049f7321ca.json @@ -0,0 +1,436 @@ +{ + "file_name": "002b864bf68d00d74d0cad76be4cbc049f7321ca.txt", + "total_words": 1198, + "top_n_words": [ + { + "word": "the", + "count": 70 + }, + { + "word": "to", + "count": 37 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "was", + "count": 23 + }, + { + "word": "he", + "count": 22 + }, + { + "word": "monis", + "count": 17 + }, + { + "word": "his", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "Crisis in a cafe .", + "length": 18 + }, + { + "text": "Who were the victims?", + "length": 21 + }, + { + "text": "An extremist theology .", + "length": 23 + }, + { + "text": "The result of their airstrikes.", + "length": 31 + }, + { + "text": "\"We need to understand why he was.", + "length": 34 + }, + { + "text": "Accessory to murder, sex charges .", + "length": 34 + }, + { + "text": "\" What we know, what we don't know .", + "length": 36 + }, + { + "text": "Two hostages were killed during the standoff.", + "length": 45 + }, + { + "text": "Monday's hostage situation began around 10 a.", + "length": 45 + }, + { + "text": "of America and its allies including Australia.", + "length": 46 + }, + { + "text": "We also need to understand why he wasn't picked up.", + "length": 51 + }, + { + "text": "\" Police have refused to comment on that accusation.", + "length": 52 + }, + { + "text": "The more you fight with crime, the more peaceful you are.", + "length": 57 + }, + { + "text": "\" The man holding the hostages demanded to speak to Abbott.", + "length": 59 + }, + { + "text": "\"But I don't believe he was on a terror watch list at this time.", + "length": 64 + }, + { + "text": "\" There's a graphic photo of slain children at the top of the site.", + "length": 67 + }, + { + "text": "\"We're all outraged that this guy was on the street,\" he told reporters.", + "length": 72 + }, + { + "text": "But the criminal accusations against him began even before he came to Australia.", + "length": 80 + }, + { + "text": "\" He had left behind a wife and two children, who he believed Monis had not seen since.", + "length": 87 + }, + { + "text": "Throughout the 2000s, said Conditsis, Monis \"became sympathetic to what he perceived ...", + "length": 88 + }, + { + "text": "Police later announced that the siege was over and that the lone gunman had been killed.", + "length": 88 + }, + { + "text": "Gunfire erupted early Tuesday as police stormed the cafe where the gunman had been holding hostages.", + "length": 100 + }, + { + "text": "He used the Internet to spread extremist beliefs, garnering nearly 13,000 likes on his Facebook page.", + "length": 101 + }, + { + "text": "Australian media captured haunting images of hostages pressing their hands against the cafe's windows.", + "length": 102 + }, + { + "text": "was the victimization of Muslims and Islamists around the world, and partly at least took up that cause.", + "length": 104 + }, + { + "text": "That made the hostage-taker furious, reported Chris Reason, a correspondent for CNN affiliate Seven Network.", + "length": 108 + }, + { + "text": "Hours into the crisis, at least five hostages managed to escape, running terrified toward police in riot gear.", + "length": 110 + }, + { + "text": "Police were monitoring social media because hostages appeared to be posting information about the man's demands.", + "length": 112 + }, + { + "text": "\"They should have put him away and thrown away the key,\" the dead woman's godfather, Ayyut Khalik, told NBC News.", + "length": 113 + }, + { + "text": "\" Conditsis told Australian public broadcaster ABC that Monis was an isolated figure who was probably acting alone.", + "length": 116 + }, + { + "text": "He said Monis used to beat Pal, forcing her to wear a hijab all the time and forbidding her from talking to \"outsiders.", + "length": 119 + }, + { + "text": "Monis fled his homeland in 1995 while being sought for allegedly committing fraud, Iran's semi-official Fars News reported.", + "length": 123 + }, + { + "text": "On his website, which has now been taken down, there was a pledge of allegiance to the so-called Islamic State terror group.", + "length": 124 + }, + { + "text": "Monis had been granted political asylum in 2001 and had had no further contact with his birth country, the spokesperson said.", + "length": 125 + }, + { + "text": "\" His broader cause, he said, was lobbying governments around the world, particularly Australia, not to wage wars on Muslim soil.", + "length": 129 + }, + { + "text": "Hundreds of police officers, including snipers, took position around the Lindt Chocolate Cafe in Sydney's central business district.", + "length": 132 + }, + { + "text": "During the siege, Abbott said, the hostage-taker \"sought to cloak his actions with the symbolism of the (ISIS) death cult,\" Abbott said.", + "length": 136 + }, + { + "text": "A spokesperson at Iran's embassy in Canberra told CNN that Tehran had officially requested Monis's extradition but nothing had come of it.", + "length": 138 + }, + { + "text": "The letters were \"sadistic, wantonly cruel and deeply wounding,\" one High Court judge said at the time, according to CNN affiliate Seven News.", + "length": 142 + }, + { + "text": "Noleen Hayson Pal was found dead with multiple stab wounds in a stairwell, and her body had been set on fire, The Sydney Morning Herald reported.", + "length": 145 + }, + { + "text": "\" Court documents show Monis was also facing 45 sex-related charges, including sexual intercourse without consent and aggravated indecent assault.", + "length": 146 + }, + { + "text": "They were reportedly taking turns holding a black flag with Arabic writing on it that said, \"There is no God but God and Mohammed is the prophet of God.", + "length": 152 + }, + { + "text": "Reason said he could see the gunman become \"extremely agitated\" when he realized what had happened, and he \"started screaming orders\" at the remaining hostages.", + "length": 160 + }, + { + "text": "\" New South Wales Premier Mike Baird said authorities were investigating why Monis -- who was killed in the siege -- was at large, given his criminal background.", + "length": 161 + }, + { + "text": "Monis was using the name Mohammad Hassan Manteghi -- his birth name, according to Iran's state news agency IRNA -- and claimed to be a \"healer,\" according to the report.", + "length": 169 + }, + { + "text": "The gunman who held hostages for more than 16 hours in a Sydney cafe was no stranger to police -- and was on bail for violent criminal offenses at the time of the siege.", + "length": 169 + }, + { + "text": "\" While older footage of Monis preaching shows him dressed in typical Shiite cleric's attire, in his social media posts, he appears to embrace a radical Sunni extremist theology.", + "length": 178 + }, + { + "text": "A YouTube video posted in November shows Monis standing on a street corner, chains draped over him, carrying a sign that says, \"I have been tortured in prison for my political letters.", + "length": 184 + }, + { + "text": "His last tweet linked to his website, with a haunting message posted the day of his attack on the Sydney cafe: \"If we stay silent towards the criminals we cannot have a peaceful society.", + "length": 186 + }, + { + "text": "Monis also pleaded guilty last year to writing offensive \"poison pen\" letters to the families of Australian soldiers who died in Afghanistan, and was sentenced to 300 hours of community service.", + "length": 194 + }, + { + "text": "\" A description on the site portrayed Monis as a victim of a political vendetta and compares him to Julian Assange, the WikiLeaks founder who has claimed the sex crime allegations he faces are politically motivated.", + "length": 215 + }, + { + "text": "The site describes Monis as a Muslim cleric and activist based in Sydney who has \"continuously been under attack \u0026 false accusation by the Australian government \u0026 media since he started his political letter campaign from 2007.", + "length": 226 + }, + { + "text": "\"He was so blinded by that objective that it would seem he had lost sight of objectivity and rationality and acted in extreme ways,\" he said, describing his former client as \"intensely conflicted and contradicted and inconsistent.", + "length": 230 + }, + { + "text": "According to the Sydney Morning Herald, the initial charges, laid in May 2014, related to an alleged sexual assault on a woman in western Sydney in 2002, before other sex-related charges were added regarding six additional victims.", + "length": 231 + }, + { + "text": "Man Haron Monis, an Iranian-born refugee who was granted political asylum in Australia in 2001, had \"a long history of violent crime, infatuation with extremism and mental instability,\" Australian Prime Minister Tony Abbott told reporters.", + "length": 239 + }, + { + "text": "The self-styled Muslim cleric, also known as Sheikh Haron, was facing dozens of charges at the time of the siege, including two counts of being an accessory to the murder of his ex-wife, according to the Attorney General of New South Wales.", + "length": 240 + }, + { + "text": "\"It's pretty obvious that the perpetrator was a deeply disturbed individual,\" he said at a press conference Tuesday, adding that the 50-year-old was \"well known\" to federal and state police, as well as the Australian Security Intelligence Organization.", + "length": 252 + }, + { + "text": "Manny Conditsis, a lawyer who acted for Monis in relation to the accessory to murder and letter-writing charges, told CNN his former client had been a cleric in Shiite Iran, but had become critical of the Islamic Republic's government in the late 1990s, and fled to Australia \"because he was going to be killed.", + "length": 311 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8524045646190643 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:31.714837508Z", + "first_section_created": "2025-12-23T09:33:31.716475778Z", + "last_section_published": "2025-12-23T09:33:31.716926598Z", + "all_results_received": "2025-12-23T09:33:31.809884982Z", + "output_generated": "2025-12-23T09:33:31.810117192Z", + "total_processing_time_ms": 95, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:31.716475778Z", + "publish_time": "2025-12-23T09:33:31.716730589Z", + "first_worker_start": "2025-12-23T09:33:31.717165708Z", + "last_worker_end": "2025-12-23T09:33:31.808985Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:31.717316014Z", + "start_time": "2025-12-23T09:33:31.717413719Z", + "end_time": "2025-12-23T09:33:31.717517923Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:31.717499Z", + "start_time": "2025-12-23T09:33:31.717623Z", + "end_time": "2025-12-23T09:33:31.808985Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:31.717157208Z", + "start_time": "2025-12-23T09:33:31.717236711Z", + "end_time": "2025-12-23T09:33:31.717353016Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:31.717065604Z", + "start_time": "2025-12-23T09:33:31.717165708Z", + "end_time": "2025-12-23T09:33:31.71722281Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:31.716798792Z", + "publish_time": "2025-12-23T09:33:31.716926598Z", + "first_worker_start": "2025-12-23T09:33:31.717324815Z", + "last_worker_end": "2025-12-23T09:33:31.790651Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:31.717786335Z", + "start_time": "2025-12-23T09:33:31.717843637Z", + "end_time": "2025-12-23T09:33:31.717882339Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:31.7176Z", + "start_time": "2025-12-23T09:33:31.717746Z", + "end_time": "2025-12-23T09:33:31.790651Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:31.717310514Z", + "start_time": "2025-12-23T09:33:31.717352516Z", + "end_time": "2025-12-23T09:33:31.717409018Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:31.717284913Z", + "start_time": "2025-12-23T09:33:31.717324815Z", + "end_time": "2025-12-23T09:33:31.717350316Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 163, + "min_processing_ms": 72, + "max_processing_ms": 91, + "avg_processing_ms": 81, + "median_processing_ms": 91, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3567, + "slowest_section_id": 0, + "slowest_section_time_ms": 92 + } +} diff --git a/data/output/002bfa005d3c4e7ff9fa38fc398ecd24d0046a55.json b/data/output/002bfa005d3c4e7ff9fa38fc398ecd24d0046a55.json new file mode 100644 index 0000000..7032b01 --- /dev/null +++ b/data/output/002bfa005d3c4e7ff9fa38fc398ecd24d0046a55.json @@ -0,0 +1,314 @@ +{ + "file_name": "002bfa005d3c4e7ff9fa38fc398ecd24d0046a55.txt", + "total_words": 767, + "top_n_words": [ + { + "word": "the", + "count": 35 + }, + { + "word": "was", + "count": 34 + }, + { + "word": "in", + "count": 29 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "she", + "count": 17 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "fugate", + "count": 13 + }, + { + "word": "her", + "count": 12 + }, + { + "word": "of", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Her plea however was rejected .", + "length": 31 + }, + { + "text": "He was given the death penalty .", + "length": 32 + }, + { + "text": "She now lives in Hillsdale, Michigan.", + "length": 37 + }, + { + "text": "' However the request was turned down.", + "length": 38 + }, + { + "text": "His body was later discovered on a country road.", + "length": 48 + }, + { + "text": "'His lies convicted Caril,' Berry told the paper.", + "length": 49 + }, + { + "text": "But he changed his story just before they went to trial.", + "length": 56 + }, + { + "text": "She was sentenced to life but was released after 17 years.", + "length": 58 + }, + { + "text": "This is so he can determine where voters stand on the request.", + "length": 62 + }, + { + "text": "Fugate's stepson states she suffered a series of strokes in her late 60s.", + "length": 73 + }, + { + "text": "The 1973 film Badlands starring Martin Sheen was based on the killing spree.", + "length": 76 + }, + { + "text": "In 1996 she asked for a pardon on the grounds the sentence 'did not fit the crime'.", + "length": 83 + }, + { + "text": "One of the main factors in her conviction was that she was holding a shotgun at the time.", + "length": 89 + }, + { + "text": "Attempts: Fugate (pictured in 1958) was released in 1976 after serving 17 years in prison.", + "length": 90 + }, + { + "text": "Starkweather ultimately testified that Fugate was a willing participant in the murder spree.", + "length": 92 + }, + { + "text": "She was initially sentenced to life, but the term was later reduced to between 30 and 50 years.", + "length": 95 + }, + { + "text": "'We are convinced not only that she was treated illegally and unjustly, but was, in fact, innocent.", + "length": 99 + }, + { + "text": "High school dropout Starkweather was sentenced to death and executed for his crimes in September 1959.", + "length": 102 + }, + { + "text": "In August last year, she was critically injured in a car crash in Stryker, Ohio, that killed her husband.", + "length": 105 + }, + { + "text": "Fugate admitted taking $4 from Jensen's wallet just before he and a friend, Carol King, 16, were shot to death.", + "length": 111 + }, + { + "text": "The killings began with the death of 21-year-old Robert Colvert, who was robbed, abducted and shot in late 1957.", + "length": 112 + }, + { + "text": "Starkweather's murders are the most notorious in Nebraska's history and have been depicted in films and TV programs.", + "length": 116 + }, + { + "text": "In 2007, Fugate married Fredrick Clair, a machinist who also worked as a weather observer for the National Weather Service.", + "length": 123 + }, + { + "text": "Their two-year-old daughter, Betty Jean, had been clubbed to death with the butt of a gun and her body stuffed in a cardboard box.", + "length": 130 + }, + { + "text": "When Fugate was released, she is believed to have moved to Lansing, Michigan, where she worked as a janitor and medical technician.", + "length": 131 + }, + { + "text": "In 1996, she asked for a pardon saying the 'sentence did not fit the crime', also telling the newspaper: 'Everyone knows I never killed anyone.", + "length": 143 + }, + { + "text": "This meant she was eligible for parole in 1976 and was released after serving 17 years of her sentence because she had been considered a 'model prisoner'.", + "length": 154 + }, + { + "text": "' He added that he will survey the candidates for governor, attorney general and secretary of state — the three offices that comprise the Pardons Board.", + "length": 154 + }, + { + "text": "Two months later authorities found many other bodies including those of Fugate's stepfather Marion Bartlett, 57 and his 35-year-old wife Velda, in an outbuilding.", + "length": 162 + }, + { + "text": "Killer: Starkweather (pictured in a Converse County jail cell) was arrested following the two-month killing spree which lasted between December 1957 and January 1958.", + "length": 166 + }, + { + "text": "Defiance: Fugate (pictured with her late husband Frederick in 2013) has always maintained her innocence and has the backing of several attorneys who say her sentence was unjust .", + "length": 178 + }, + { + "text": "Mr Berry, a Lincoln attorney who published The Twelfth Victim earlier this year, claims Starkweather told investigators several times that Fugate was not involved in the slayings .", + "length": 181 + }, + { + "text": "Fugate, who says she was forced to accompany the killer, was found guilty of being an accessory to first-degree murder in connection with the death of 17-year-old victim Robert Jensen.", + "length": 184 + }, + { + "text": "Caril Ann Fugate, now 71, was on a two-month road trip with notorious mass murderer Charles Starkweather from Lincoln, Nebraska, to Wyoming when he killed 11 people between 1957 and 58.", + "length": 186 + }, + { + "text": "Sentence: After serving 17 months in prison, Starkweather was put to death in the electric chair in 1959 after being found guilty of killing Robert Jenson, the only murder he was tried for .", + "length": 190 + }, + { + "text": "Plea: Caril Ann Fugate (pictured left at 15) was on a road trip with mass murderer Charles Starkweather (then 19) between Lincoln, Nebraska, and Wyoming when he killed 11 people between 1957 and 1958 .", + "length": 202 + }, + { + "text": "A woman who was just 14 when she accompanied her 19-year-old boyfriend on a killing spree is set to ask for a pardon 57 years after she was jailed, insisting she didn't kill anybody and her sentence was unfair.", + "length": 210 + }, + { + "text": "Defense attorney John Stevens Berry, who wrote a book earlier this year suggesting Fugate was unfairly convicted, is backing her plea and is hoping the move will become a campaign issue in the Nebraska gubernatorial race, according to the Omaha World Herald.", + "length": 259 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6983405351638794 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:32.217580957Z", + "first_section_created": "2025-12-23T09:33:32.217967173Z", + "last_section_published": "2025-12-23T09:33:32.218279487Z", + "all_results_received": "2025-12-23T09:33:32.28531456Z", + "output_generated": "2025-12-23T09:33:32.285476467Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:32.217967173Z", + "publish_time": "2025-12-23T09:33:32.218279487Z", + "first_worker_start": "2025-12-23T09:33:32.218787909Z", + "last_worker_end": "2025-12-23T09:33:32.284104Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:32.218720706Z", + "start_time": "2025-12-23T09:33:32.218808409Z", + "end_time": "2025-12-23T09:33:32.218899113Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:32.219008Z", + "start_time": "2025-12-23T09:33:32.219159Z", + "end_time": "2025-12-23T09:33:32.284104Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:32.218776408Z", + "start_time": "2025-12-23T09:33:32.218855911Z", + "end_time": "2025-12-23T09:33:32.218952516Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:32.218725406Z", + "start_time": "2025-12-23T09:33:32.218787909Z", + "end_time": "2025-12-23T09:33:32.218839211Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4466, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/002c115c31577d0e9443794d27c2df51312858b4.json b/data/output/002c115c31577d0e9443794d27c2df51312858b4.json new file mode 100644 index 0000000..dce293a --- /dev/null +++ b/data/output/002c115c31577d0e9443794d27c2df51312858b4.json @@ -0,0 +1,406 @@ +{ + "file_name": "002c115c31577d0e9443794d27c2df51312858b4.txt", + "total_words": 863, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "a", + "count": 30 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "delhi", + "count": 19 + }, + { + "word": "is", + "count": 15 + }, + { + "word": "s", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "with", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": ").", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": ").", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "-9 p.", + "length": 5 + }, + { + "text": "-11 p.", + "length": 6 + }, + { + "text": "Chaat .", + "length": 7 + }, + { + "text": "-8:30 p.", + "length": 8 + }, + { + "text": "Samosa .", + "length": 8 + }, + { + "text": "Kebabs .", + "length": 8 + }, + { + "text": "Not done.", + "length": 9 + }, + { + "text": "Parantha .", + "length": 10 + }, + { + "text": "Ram laddoo .", + "length": 12 + }, + { + "text": "Faluda kulfi .", + "length": 14 + }, + { + "text": "Chola bhatura .", + "length": 15 + }, + { + "text": "(If there was such a thing.", + "length": 27 + }, + { + "text": "There are 57 varieties in all.", + "length": 30 + }, + { + "text": "A plate of ram laddoo is your fix.", + "length": 34 + }, + { + "text": "It works both as a lunch and dinner item.", + "length": 41 + }, + { + "text": "Traveling to Delhi and not trying the kebabs?", + "length": 45 + }, + { + "text": "Paranthe Wali Gali offers innovative parantha options.", + "length": 54 + }, + { + "text": "You can typically choose among mutton or chicken kebabs.", + "length": 56 + }, + { + "text": "After consuming mouth-burning delicacies it's kulfi time.", + "length": 57 + }, + { + "text": "It's a popular breakfast item in Delhi's Punjabi households.", + "length": 60 + }, + { + "text": ") range of chaat options do the trick for a satisfying dinner.", + "length": 62 + }, + { + "text": "Desperate for an energy boost after a tiring shopping session?", + "length": 62 + }, + { + "text": "No snack this, chola bhatura is for people with huge appetites.", + "length": 63 + }, + { + "text": "If you aren't lucky enough to see one, try it at Lajpat Nagar main market.", + "length": 74 + }, + { + "text": "Kulfi is India's local ice cream, made with milk and a smattering of dried fruits.", + "length": 82 + }, + { + "text": "Baba Nagpal Corner in Lajpat Nagar is arguably the best chola bhatura area in New Delhi.", + "length": 88 + }, + { + "text": "\" But skip the street food scene and you miss an essential part of the Delhi experience.", + "length": 88 + }, + { + "text": "Samosa is chaat's close competitor for the title of \"Definitive Delhi Street food\" title.", + "length": 89 + }, + { + "text": "The Chandni Chowk and Hazrat Nizamuddin areas house some of the best kebab outlets in Delhi.", + "length": 92 + }, + { + "text": "Prabhu Chaat Bhandar's (Dholpur House, Shahjahan Road, Khan Market, New Delhi; open daily 11 a.", + "length": 95 + }, + { + "text": "Best to try it at the make-shift ram laddoo stalls, sometimes on a bicycle, that are strewn across town.", + "length": 104 + }, + { + "text": ") and Bengali Sweet House (27-33, Bengali Market, Connaught Place, New Delhi; +91 11 2331 9224; open daily 8 a.", + "length": 111 + }, + { + "text": ") It is to India, perhaps, what momo dumplings are to Tibet and bagels are to certain parts of the United States.", + "length": 113 + }, + { + "text": "The queen of them all is faluda kulfi (faluda is a popular rose milk flavor dessert drink with vermicelli noodles).", + "length": 115 + }, + { + "text": "Here are seven street delicacies among Delhi's endless choices, including a mix of vegetarian, non-veg and dessert.", + "length": 115 + }, + { + "text": "But if you find yourself in Karol Bagh or Rajouri Garden, you could hop into any of the numerous chola bhatura shops.", + "length": 117 + }, + { + "text": "Travelers to the Indian capital may hesitate to try the city's famed street foods, fearing the notorious \"Delhi belly.", + "length": 118 + }, + { + "text": "Great places to try samosas in Delhi include Rewari Sweets (Sadar Bazar, Gurgaon, India; +91 124 232 1826; open daily 8 a.", + "length": 122 + }, + { + "text": "Served in silver-colored, throwaway plastic bowls, one serving of six-to-seven balls is a great power snack or even lunch.", + "length": 122 + }, + { + "text": "As most people know, samosas are deep-fried, triangular pastry pockets, packed with potato, peas, lentils and sometimes meat.", + "length": 125 + }, + { + "text": "(CNN) -- With the sweltering summer bidding adieu and pleasant autumn temperatures setting in, now's the time to explore New Delhi.", + "length": 131 + }, + { + "text": "Although they're often served as an appetizer at Indian restaurants around the world, they can be paired with chaat for a full meal.", + "length": 132 + }, + { + "text": "The trusted Krishna Di Kulfi in Pandara Road Market serves kulfi with the heavenly faluda, a beverage consisting of rose milk and vermicelli.", + "length": 141 + }, + { + "text": "A legacy left behind by the Mughals, who invaded India in the 16th century, the grilled meats served on skewers make the best on-the-go protein meals.", + "length": 150 + }, + { + "text": "Located in a remote corner in old Delhi's Chandni Chowk area, Paranthe Wali Gali (Lane of Paranthas) is home to some of Delhi's best-known parantha joints.", + "length": 155 + }, + { + "text": "Ram laddoo (\"laddoo\" is a name for sweet flour balls) are savory, deep-fried moong balls served with chili-coriander sauce and garnished with grated radish.", + "length": 156 + }, + { + "text": "The chapati's stouter, fancier cousin, parantha is a pan-fried flatbread generally stuffed with vegetables, such as mashed potato, grated cauliflower and radish.", + "length": 161 + }, + { + "text": "Fluffy, plain, flour bread combined with a chickpea curry, garnished with chopped onion and served with a tangy mango pickle, chola bhatura is a Delhi meal staple.", + "length": 163 + }, + { + "text": "Many Delhi chaat addicts flock to a narrow alley behind the Union Public Service Commission's office near Khan Market (Humayun Road, Pandara Flats, India Gate, New Delhi, India).", + "length": 178 + }, + { + "text": "Sitting under a tiny tin-roofed shop in and downing deep-fried potato patties floating in a blend of yogurt, spicy green and red sauces served in leaf bowls is a definitive Delhi experience that even hardcore locals can't get enough of.", + "length": 236 + }, + { + "text": "If you aren't up for the large crowds in these bustling locations, you can take refuge in the more accessible Qureshi's Corner in Greater Kailash II (8, Narmada Shopping Complex, Alaknanda, New Delhi; +91 11 2602 0563; open daily 7-11 p.", + "length": 237 + }, + { + "text": "As you plonk down in one of the Gali's tiny, crammed restaurants, you'll find a number of delicious offerings: Indian rabri (sweet yogurt) parantha, mirch (red pepper) parantha, and lemon parantha (prepared from lemon zest, and probably the best this street has to offer).", + "length": 272 + }, + { + "text": "Chaat is a collective term used to describe savory street dishes in India, especially these three: aloo tikki (described in the gallery above), dahi bhalla (cutlets of skinless black lentil-like gram submerged in yogurt) and papri chaat (a hybrid of aloo tikki and dahi bhalla with salty, plain biscuits thrown in).", + "length": 315 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.421882688999176 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:32.719136455Z", + "first_section_created": "2025-12-23T09:33:32.719464669Z", + "last_section_published": "2025-12-23T09:33:32.719820284Z", + "all_results_received": "2025-12-23T09:33:32.7855292Z", + "output_generated": "2025-12-23T09:33:32.78576111Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:32.719464669Z", + "publish_time": "2025-12-23T09:33:32.719820284Z", + "first_worker_start": "2025-12-23T09:33:32.720387308Z", + "last_worker_end": "2025-12-23T09:33:32.784552Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:32.720384508Z", + "start_time": "2025-12-23T09:33:32.720452911Z", + "end_time": "2025-12-23T09:33:32.720551615Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:32.720599Z", + "start_time": "2025-12-23T09:33:32.720736Z", + "end_time": "2025-12-23T09:33:32.784552Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:32.720339406Z", + "start_time": "2025-12-23T09:33:32.72042081Z", + "end_time": "2025-12-23T09:33:32.720530614Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:32.720327206Z", + "start_time": "2025-12-23T09:33:32.720387308Z", + "end_time": "2025-12-23T09:33:32.72043381Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4953, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/002c715ea1428373cc432c9508d4a48d2e6069f4.json b/data/output/002c715ea1428373cc432c9508d4a48d2e6069f4.json new file mode 100644 index 0000000..63b6d2f --- /dev/null +++ b/data/output/002c715ea1428373cc432c9508d4a48d2e6069f4.json @@ -0,0 +1,278 @@ +{ + "file_name": "002c715ea1428373cc432c9508d4a48d2e6069f4.txt", + "total_words": 402, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "app", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "we", + "count": 11 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "gmail", + "count": 8 + }, + { + "word": "it", + "count": 8 + }, + { + "word": "google", + "count": 7 + }, + { + "word": "was", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "\" The app ...", + "length": 13 + }, + { + "text": "\"UPDATE: DO NOT.", + "length": 16 + }, + { + "text": "Very disappointed.", + "length": 18 + }, + { + "text": "Sorry we messed up.", + "length": 19 + }, + { + "text": "\"Not worth loading.", + "length": 19 + }, + { + "text": "At least, until it wasn't.", + "length": 26 + }, + { + "text": "Then, a few minutes passed.", + "length": 27 + }, + { + "text": "\"Go get the iPhone app for Gmail!", + "length": 33 + }, + { + "text": "The tech blogosphere was delighted.", + "length": 35 + }, + { + "text": "\"We have pulled the app to fix the problem.", + "length": 43 + }, + { + "text": "at least the fully working version of it ...", + "length": 44 + }, + { + "text": "The Gmail app is really a piece of crud,\" he wrote.", + "length": 51 + }, + { + "text": "\" Google had obviously already noticed what Scoble did.", + "length": 55 + }, + { + "text": "\" popular blogger Robert Scoble wrote on his Google+ page.", + "length": 58 + }, + { + "text": "Everyone who's already installed the app can continue to use it.", + "length": 64 + }, + { + "text": "\" Later, a fuller explanation was added to the original blog post.", + "length": 66 + }, + { + "text": "\"Earlier today we launched a new Gmail app for iOS,\" Google posted.", + "length": 67 + }, + { + "text": "Shortly after it was unveiled, the app was pulled from the Apple Store.", + "length": 71 + }, + { + "text": "But the new app promised a smoother experience with a host of new features.", + "length": 75 + }, + { + "text": "There was no word from Google Wednesday afternoon as to when the fixed app might be back up.", + "length": 92 + }, + { + "text": "(CNN) -- One of the most highly anticipated apps for Apple devices was made available on Wednesday.", + "length": 99 + }, + { + "text": "We've removed the app while we correct the problem, and we're working to bring you a new version soon.", + "length": 102 + }, + { + "text": "\"The iOS app we launched today contained a bug with notifications,\" Google posted on its Gmail Twitter account.", + "length": 111 + }, + { + "text": "\"We check email pretty much everywhere these days,\" Google content manager Matthew Izatt wrote on the Gmail blog.", + "length": 113 + }, + { + "text": "\"Unfortunately, it contained a bug which broke notifications and caused users to see an error message when first opening the app.", + "length": 129 + }, + { + "text": "\" Users of Apple's operating system could already access Gmail through a mobile site or set it up as their default e-mail account.", + "length": 130 + }, + { + "text": "\"And when we do, we want easy access to our important messages so we can respond quickly and get back to life -- or slinging birds at thieving green pigs.", + "length": 154 + }, + { + "text": "will send push notifications and sound cues when new messages are received, search your inbox for an e-mail, autocomplete e-mail addresses and upload photos to messages.", + "length": 169 + }, + { + "text": "Google announced a Gmail app for the iPhone, iPad and iPod Touch that was designed to make it easier for the service's more than 190 million users to navigate their mail.", + "length": 170 + }, + { + "text": "It will also feature the same Priority Inbox that Gmail's Web version has and add a new mobile interface that will allow users to navigate their inbox more quickly with touch controls.", + "length": 184 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5496125221252441 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:33.22064615Z", + "first_section_created": "2025-12-23T09:33:33.220996665Z", + "last_section_published": "2025-12-23T09:33:33.221174673Z", + "all_results_received": "2025-12-23T09:33:33.2801324Z", + "output_generated": "2025-12-23T09:33:33.280287007Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:33.220996665Z", + "publish_time": "2025-12-23T09:33:33.221174673Z", + "first_worker_start": "2025-12-23T09:33:33.221730097Z", + "last_worker_end": "2025-12-23T09:33:33.279222Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:33.221768098Z", + "start_time": "2025-12-23T09:33:33.221837201Z", + "end_time": "2025-12-23T09:33:33.221886103Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:33.222002Z", + "start_time": "2025-12-23T09:33:33.22214Z", + "end_time": "2025-12-23T09:33:33.279222Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:33.221741597Z", + "start_time": "2025-12-23T09:33:33.221793499Z", + "end_time": "2025-12-23T09:33:33.221847102Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:33.221666294Z", + "start_time": "2025-12-23T09:33:33.221730097Z", + "end_time": "2025-12-23T09:33:33.221749098Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2277, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/002c962834b7886c600a31a35053543e324883bd.json b/data/output/002c962834b7886c600a31a35053543e324883bd.json new file mode 100644 index 0000000..5cd1bc8 --- /dev/null +++ b/data/output/002c962834b7886c600a31a35053543e324883bd.json @@ -0,0 +1,440 @@ +{ + "file_name": "002c962834b7886c600a31a35053543e324883bd.txt", + "total_words": 1034, + "top_n_words": [ + { + "word": "hinckley", + "count": 42 + }, + { + "word": "the", + "count": 41 + }, + { + "word": "to", + "count": 32 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "a", + "count": 27 + }, + { + "word": "he", + "count": 26 + }, + { + "word": "his", + "count": 20 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "said", + "count": 19 + }, + { + "word": "of", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "Dr.", + "length": 3 + }, + { + "text": "On July 29, St.", + "length": 15 + }, + { + "text": "\" \"The hospital doesn't know what Mr.", + "length": 37 + }, + { + "text": "hands with all his lawyers and sat down.", + "length": 40 + }, + { + "text": "Tyler Jones, director of psychiatry at St.", + "length": 42 + }, + { + "text": "That would be followed by six visits of 24 days.", + "length": 48 + }, + { + "text": "Elizabeths Hospital filed a proposal to increase that.", + "length": 54 + }, + { + "text": "Jones said he has interviewed Hinckley but has not treated him.", + "length": 63 + }, + { + "text": "He added that Hinckley is \"flawed\" but is \"fundamentally decent.", + "length": 64 + }, + { + "text": "The first step would allow Hinckley to have two visits of 17 days.", + "length": 66 + }, + { + "text": ", who shot President Ronald Reagan and three others in March 1981.", + "length": 66 + }, + { + "text": "Hinckley is thinking, and he wants it that way,\" the prosecutor said.", + "length": 69 + }, + { + "text": "\" Currently, Hinckley is allowed to visit his mother 10 days a month.", + "length": 69 + }, + { + "text": "Hinckley's mother is now 85 years old and is not a scheduled witness.", + "length": 69 + }, + { + "text": "The trial ended in a jury verdict of not guilty by reason of insanity.", + "length": 70 + }, + { + "text": "\" Since 1999, Levine said, Hinckley has been taking a drug called Risperdal.", + "length": 76 + }, + { + "text": "There is a definite possibility I will be killed in my attempt to get Reagan.", + "length": 77 + }, + { + "text": "\" Levine said that in the two and a half decades that Hinckley has been at St.", + "length": 78 + }, + { + "text": "Prosecutors want to cross-examine Hinckley and his defense lawyers oppose that.", + "length": 79 + }, + { + "text": "But Jones said he's been in remission for both of those disorders for many years.", + "length": 81 + }, + { + "text": "He left a letter addressed to her in his Washington hotel room saying, \"Dear Jodie.", + "length": 83 + }, + { + "text": "\"This man is not dangerous and the evidence shows he is not dangerous,\" Levine said.", + "length": 84 + }, + { + "text": "District Judge Paul Friedman greeted Hinckley and he replied, saying, \"Good morning.", + "length": 84 + }, + { + "text": "He said Hinckley also suffers from narcissism, which has improved but is still present.", + "length": 87 + }, + { + "text": "Hinckley, who was 25 at the time of the shooting, was enamored of actress Jodie Foster.", + "length": 87 + }, + { + "text": "\" He said the hospital considers Hinckley \"a low risk of violence to himself and others.", + "length": 88 + }, + { + "text": "On Wednesday, Hinckley's lawyers said he is not dangerous and should eventually be released.", + "length": 92 + }, + { + "text": "Washington (CNN) -- A hearing continues Thursday to determine the future of John Hinckley Jr.", + "length": 93 + }, + { + "text": "Elizabeths, testified Hinckley also started taking Zoloft in 2005 after complaining about anxiety.", + "length": 98 + }, + { + "text": "On March 30, 1981, Hinckley waited for President Reagan to leave a Washington Hotel after a speech.", + "length": 99 + }, + { + "text": "Mental health experts and Secret Service agents will testify, along with Hinckley's brother and sister.", + "length": 103 + }, + { + "text": "\" According to the document, in June of 2009 he went on the Internet to find photos of his female dentist.", + "length": 106 + }, + { + "text": "All survived, but Brady suffered a serious head wound that permanently affected his mobility and his speech.", + "length": 108 + }, + { + "text": "According to Chasson, in the first instance in July, Hinckley was supposed to go to the movie \"Captain America.", + "length": 111 + }, + { + "text": "\"When he was caught, Hinckley claimed, falsely, that the dentist had invited him to view her personal photographs.", + "length": 114 + }, + { + "text": "Jones said Hinckley had been diagnosed years ago as suffering from depression and from an unspecified psychotic disorder.", + "length": 121 + }, + { + "text": "Medical websites describe Risperdal as an antipsychotic medication often used to treat bipolar disorder and schizophrenia.", + "length": 122 + }, + { + "text": "Jones said the staff had considered stronger action including the possibility of revoking Hinckley's privileges altogether.", + "length": 123 + }, + { + "text": "It's not clear how quickly the judge might issue a ruling on the hospital's plan to gradually allow Hinckley greater freedom.", + "length": 125 + }, + { + "text": "A September filing by prosecutors said Hinckley \"continues to be deceptive regarding his relationships with and interest in women.", + "length": 130 + }, + { + "text": "Although concerned Hinckley was not truthful about his activities, Jones said, \"We didn't feel this constituted an increased risk.", + "length": 130 + }, + { + "text": "Hinckley's attorney, Barry Levine, said the issue is not whether Hinckley has sometimes been deceptive but whether he is dangerous.", + "length": 131 + }, + { + "text": "\" Hinckley's defense team has listed him as a possible witness at the proceedings but has not revealed if he will definitely testify.", + "length": 133 + }, + { + "text": "\" Later when he saw his \"treatment team,\" Hinckley not only maintained he had gone to the movie, but he enthusiastically recommended it.", + "length": 136 + }, + { + "text": "Elizabeths Hospital in Washington to undergo treatment and during his visits outside that facility, there has \"not been a single act of violence.", + "length": 145 + }, + { + "text": "\" Asked about the photographs of the dentist, Jones said the photographs were of the woman graduating from dental school and were \"not salacious.", + "length": 145 + }, + { + "text": "But prosecutors are fighting that, saying Hinckley has been deceptive about his activities while on visits to his mother in Williamsburg, Virginia.", + "length": 147 + }, + { + "text": "\" After the judge and all the lawyers were in place, Hinckley, now 56, entered the court wearing a brown sports jacket, dark pants and a striped tie.", + "length": 149 + }, + { + "text": "He opened fire and hit Reagan, his press secretary James Brady, Secret Service agent Timothy McCarthy and Washington police officer Thomas Delahanty.", + "length": 149 + }, + { + "text": "Chasson also quoted from a 1987 diary entry by Hinckley in which he said \"psychiatry is a guessing game\" and doctors \"will never know the true John Hinckley.", + "length": 157 + }, + { + "text": "According to Jones, Hinckley's treatment team was informed by the Secret Service that Hinckley had not told the truth about his activities during several visits.", + "length": 161 + }, + { + "text": "Jones said the medical staff discussed this issue with Hinckley, who initially did not appear to view the issue as a big deal, but later understood it was a serious issue.", + "length": 171 + }, + { + "text": "A requirement of Hinckley's current visitation program is that plans be laid out detailing what he will do when on his own and that medical staff and the Secret Service are informed.", + "length": 182 + }, + { + "text": "According to the September government filing opposing the plan, the hospital would then \"be given the sole discretion to place Hinckley on convalescent leave in his mother's hometown.", + "length": 183 + }, + { + "text": "\" The staff decided to reduce Hinckley's Christmas visit to his mother from 10 days to five days, and he will not be allowed to have any unaccompanied activities during that December stay.", + "length": 188 + }, + { + "text": "After an expected week and half of testimony, a federal judge will consider whether Hinckley should eventually be released from a mental hospital, where he has been a patient since his 1982 trial.", + "length": 196 + }, + { + "text": "On several occasions in July and September, Hinckley was supposed to go to the movies or shopping but instead went to bookstores where he looked at books about Ronald Reagan and presidential assassins, Chasson said.", + "length": 215 + }, + { + "text": "In opening statements, prosecutor Sarah Chasson said Secret Service agents will testify they performed surveillance on Hinckley without his knowledge earlier this year when he was allowed what he was told was unsupervised free time in Williamsburg.", + "length": 248 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5900307595729828 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:33.721945037Z", + "first_section_created": "2025-12-23T09:33:33.723278894Z", + "last_section_published": "2025-12-23T09:33:33.723605208Z", + "all_results_received": "2025-12-23T09:33:33.8447424Z", + "output_generated": "2025-12-23T09:33:33.844919908Z", + "total_processing_time_ms": 122, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 121, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:33.723278894Z", + "publish_time": "2025-12-23T09:33:33.723524505Z", + "first_worker_start": "2025-12-23T09:33:33.724133631Z", + "last_worker_end": "2025-12-23T09:33:33.800662Z", + "total_journey_time_ms": 77, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:33.724414443Z", + "start_time": "2025-12-23T09:33:33.724555949Z", + "end_time": "2025-12-23T09:33:33.724662153Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:33.724888Z", + "start_time": "2025-12-23T09:33:33.725005Z", + "end_time": "2025-12-23T09:33:33.800662Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:33.724143931Z", + "start_time": "2025-12-23T09:33:33.724212934Z", + "end_time": "2025-12-23T09:33:33.72433844Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:33.723914121Z", + "start_time": "2025-12-23T09:33:33.724133631Z", + "end_time": "2025-12-23T09:33:33.724176533Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:33.723555806Z", + "publish_time": "2025-12-23T09:33:33.723605208Z", + "first_worker_start": "2025-12-23T09:33:33.724426643Z", + "last_worker_end": "2025-12-23T09:33:33.843882Z", + "total_journey_time_ms": 120, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:33.724672054Z", + "start_time": "2025-12-23T09:33:33.724708355Z", + "end_time": "2025-12-23T09:33:33.724733856Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:33.72482Z", + "start_time": "2025-12-23T09:33:33.724957Z", + "end_time": "2025-12-23T09:33:33.843882Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 118 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:33.724394142Z", + "start_time": "2025-12-23T09:33:33.724426643Z", + "end_time": "2025-12-23T09:33:33.724458645Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:33.724480146Z", + "start_time": "2025-12-23T09:33:33.724529248Z", + "end_time": "2025-12-23T09:33:33.724541648Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 193, + "min_processing_ms": 75, + "max_processing_ms": 118, + "avg_processing_ms": 96, + "median_processing_ms": 118, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3125, + "slowest_section_id": 1, + "slowest_section_time_ms": 120 + } +} diff --git a/data/output/002c98258b69f80d791fad20e11ae6fe4bc5ee65.json b/data/output/002c98258b69f80d791fad20e11ae6fe4bc5ee65.json new file mode 100644 index 0000000..4385132 --- /dev/null +++ b/data/output/002c98258b69f80d791fad20e11ae6fe4bc5ee65.json @@ -0,0 +1,540 @@ +{ + "file_name": "002c98258b69f80d791fad20e11ae6fe4bc5ee65.txt", + "total_words": 1266, + "top_n_words": [ + { + "word": "the", + "count": 84 + }, + { + "word": "to", + "count": 38 + }, + { + "word": "a", + "count": 30 + }, + { + "word": "in", + "count": 30 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "plate", + "count": 19 + }, + { + "word": "be", + "count": 15 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "at", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "' Rose said.", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "What is this?", + "length": 13 + }, + { + "text": "Cousins in 2011.", + "length": 16 + }, + { + "text": "Tony Sanchez posted.", + "length": 20 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "'What's the game coming to?", + "length": 27 + }, + { + "text": "But not everyone is pleased.", + "length": 28 + }, + { + "text": "' 'What's the game coming to?", + "length": 29 + }, + { + "text": "05:07 EST, 12 December 2013 .", + "length": 29 + }, + { + "text": "04:46 EST, 12 December 2013 .", + "length": 29 + }, + { + "text": "'No more home plate collisions?!", + "length": 32 + }, + { + "text": "Some players spoke up on Twitter.", + "length": 33 + }, + { + "text": "plate collisions by 2015 latest .", + "length": 33 + }, + { + "text": "NFL quarterbacks are catchers now?", + "length": 34 + }, + { + "text": "MLB authorities hope to ban home .", + "length": 34 + }, + { + "text": "'You're not allowed to pitch inside.", + "length": 36 + }, + { + "text": "and three torn ligaments in his ankle.", + "length": 38 + }, + { + "text": "Rose says he opposes the rules change .", + "length": 39 + }, + { + "text": "' Oakland outfielder Josh Reddick wrote.", + "length": 40 + }, + { + "text": "' Baltimore manager Buck Showalter said.", + "length": 40 + }, + { + "text": "I thought baseball was doing pretty good.", + "length": 41 + }, + { + "text": "plate collisions in Major League Baseball .", + "length": 43 + }, + { + "text": "The incident fuelled a decision to ban home .", + "length": 45 + }, + { + "text": "Bone crunching: San Francisco Giants catcher .", + "length": 46 + }, + { + "text": "Posey is helped from the field: The collision .", + "length": 47 + }, + { + "text": "MLB intends to have varied tiers of punishment.", + "length": 47 + }, + { + "text": "'Does it include at every base or just home plate?", + "length": 50 + }, + { + "text": "Now you're not allowed to try to be safe at home plate?", + "length": 55 + }, + { + "text": "Drafting the big league rule figures to be complicated.", + "length": 55 + }, + { + "text": "The hitters wear more armor than the Humvees in Afghanistan.", + "length": 60 + }, + { + "text": "'Since 1869, baseball has been doing pretty well,' Rose said.", + "length": 61 + }, + { + "text": "Fosse injured a shoulder, and his career went into a tailspin.", + "length": 62 + }, + { + "text": "'I think there will be two levels of enforcement,' Alderson said.", + "length": 65 + }, + { + "text": "'We're going back 40 years ago, but the mindset has changed a bit.", + "length": 66 + }, + { + "text": "'What are they going to do next, you can't break up a double play?", + "length": 66 + }, + { + "text": "Buster Posey's leg snaps as he's struck by Florida Marlins' Scott .", + "length": 67 + }, + { + "text": "'The only rules they ever changed was the mound (height) and the DH.", + "length": 68 + }, + { + "text": "controversial game-winning run the 1970 All-Star game in Cincinnati.", + "length": 68 + }, + { + "text": "'Nothing better than getting run over and showing the umpire the ball.", + "length": 70 + }, + { + "text": "The union declined to comment, pending a review of the proposed change.", + "length": 71 + }, + { + "text": "ended his season after he suffered a broken bone in his lower left leg .", + "length": 72 + }, + { + "text": "'When I was growing up as a kid in Philadelphia, it was a badge of honour.", + "length": 74 + }, + { + "text": "Evidently the guys making all these rules never played the game of baseball.", + "length": 76 + }, + { + "text": "'This is, I think, in response to a few issues that have arisen,' Alderson said.", + "length": 80 + }, + { + "text": "': Cincinnati Reds' Pete Rose slams into Cleveland Indians' Ray Fosse to score a .", + "length": 82 + }, + { + "text": "' The announced has been met with a mixed response among current and former players.", + "length": 84 + }, + { + "text": "'So, for example, intentionally running over the catcher might result in an out call.", + "length": 85 + }, + { + "text": "Maybe I'm wrong about the attendance figures and the number of people going to ballgames.", + "length": 89 + }, + { + "text": "'One will be with respect to whether the runner is declared safe or out based on conduct.", + "length": 89 + }, + { + "text": "' Approval of the players' union is needed for the rules change to be effective for 2014.", + "length": 89 + }, + { + "text": "'The costs associated in terms of health and injury just no longer warrant the status quo.", + "length": 90 + }, + { + "text": "He said the change would go into effect for next season if the players' association approved.", + "length": 93 + }, + { + "text": "But in a sport filled with nostalgia, even Scioscia wouldn't mind seeing a few modifications.", + "length": 93 + }, + { + "text": "' Rose said in a telephone interview with The Associated Press after MLB announced its plan yesterday.", + "length": 102 + }, + { + "text": "Details must be sorted out, such as what should happen if a catcher blocks the plate without the ball.", + "length": 102 + }, + { + "text": "'The exact language and how exactly the rule will be enforced is subject to final determination,' he said.", + "length": 106 + }, + { + "text": "Pete Rose, who famously flattened Ray Fosse to score the winning run in the 1970 All-Star game, was bowled over.", + "length": 112 + }, + { + "text": "You were expected to hang in at the plate, and the runner was expected to do everything he could to tag the plate.", + "length": 114 + }, + { + "text": "' The umpire can call the runner out and also eject the player if contact is determined to be malicious or flagrant.", + "length": 116 + }, + { + "text": "' Former catchers Joe Girardi, Bruce Bochy and Mike Matheny - all now managing in the majors - attended yesterday's meeting.", + "length": 124 + }, + { + "text": "'One is just the general occurrence of injuries from these incidents at home plate that affect players, both runners and catchers.", + "length": 130 + }, + { + "text": "New York Mets general manager Sandy Alderson, chairman of the rules committee, made the announcement yesterday at the winter meetings.", + "length": 134 + }, + { + "text": "So I think that the enforcement will be on the field as well as subsequent consequences in the form of fines and suspensions and the like.", + "length": 138 + }, + { + "text": "Posey returned to win the NL batting title and MVP award in 2012, when he led the Giants to their second World Series title in three seasons.", + "length": 141 + }, + { + "text": "' Alderson said wording of the rules change will be presented to owners for approval at their January 16 meeting in Paradise Valley, Arizona.", + "length": 141 + }, + { + "text": "Safety and concern over concussions were major factors - fans still cringe at the memory of the season-ending hit Buster Posey absorbed in 2011.", + "length": 144 + }, + { + "text": "And also kind of the general concern about concussions that exists not only in baseball but throughout professional sports and amateur sports today.", + "length": 148 + }, + { + "text": "' Banned for life in 1989 following a gambling investigation, Rose insists Fosse was blocking the plate without the ball, which is against the rules.", + "length": 149 + }, + { + "text": "During his 13-year career as an All-Star catcher, Mike Scioscia earned a reputation for being as tough as anyone when it came to blocking home plate.", + "length": 149 + }, + { + "text": "Discussion to limit or ban collisions has intensified since May 2011, when Posey was injured as the Marlins' Scott Cousins crashed into him at the plate.", + "length": 153 + }, + { + "text": "Posey, San Francisco's All-Star catcher, sustained a broken bone in his lower left leg and three torn ligaments in his ankle, an injury that ended his season.", + "length": 158 + }, + { + "text": "'Ultimately what we want to do is change the culture of acceptance that these plays are ordinary and routine and an accepted part of the game,' said Alderson.", + "length": 158 + }, + { + "text": "Major League Baseball officials have announced that they intend to ban home plate collisions by 2015 at the latest in response to a series of bone-crunching injuries.", + "length": 166 + }, + { + "text": "'We're going to do fairly extensive review of the types of plays that occur at home plate to determine which we're going to find acceptable and which are going to be prohibited.", + "length": 177 + }, + { + "text": "In Game Five of this year's AL championship series, Detroit backstop Alex Avila was pulled a couple of innings after being run over at the plate by Boston's David Ross, a fellow catcher.", + "length": 186 + }, + { + "text": "'If the players' association were to disapprove, then the implementation of the rule would be suspended for one year, but could be implemented unilaterally after that time,' Alderson said.", + "length": 188 + }, + { + "text": "'I think everyone is in agreement that the mindless collisions at home plate where a catcher is being targeted by a runner, that needs to be addressed,' the Los Angeles Angels manager said.", + "length": 189 + }, + { + "text": "' The NCAA instituted a rule on collisions for the 2011 season, saying 'contact above the waist that was initiated by the base runner shall not be judged as an attempt to reach the base or plate.", + "length": 195 + }, + { + "text": "Meanwhile, the NFL was forced to reach a settlement last summer in a concussion-related lawsuit by former players for $765million, and a group of hockey players sued the NHL last month over brain trauma.", + "length": 203 + }, + { + "text": "Another heavy tackle: In Game Five of this year's AL championship series, Detroit backstop Alex Avila was pulled a couple of innings after being run over at the plate by Boston's David Ross, a fellow catcher .", + "length": 209 + }, + { + "text": "'I don't think it's completely sparked by anything that's happened in baseball as much as what's happening outside of baseball and how it's impacting people and impacting the welfare of each sport,' said Matheny, now managing the St Louis Cardinals.", + "length": 249 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6044235229492188 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:34.224355771Z", + "first_section_created": "2025-12-23T09:33:34.224675585Z", + "last_section_published": "2025-12-23T09:33:34.225206008Z", + "all_results_received": "2025-12-23T09:33:34.305120333Z", + "output_generated": "2025-12-23T09:33:34.305339342Z", + "total_processing_time_ms": 80, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 79, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:34.224675585Z", + "publish_time": "2025-12-23T09:33:34.224983198Z", + "first_worker_start": "2025-12-23T09:33:34.225456919Z", + "last_worker_end": "2025-12-23T09:33:34.302307Z", + "total_journey_time_ms": 77, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:34.225426717Z", + "start_time": "2025-12-23T09:33:34.22548852Z", + "end_time": "2025-12-23T09:33:34.225588224Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:34.225781Z", + "start_time": "2025-12-23T09:33:34.22592Z", + "end_time": "2025-12-23T09:33:34.302307Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 76 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:34.225475919Z", + "start_time": "2025-12-23T09:33:34.225536922Z", + "end_time": "2025-12-23T09:33:34.225647127Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:34.225405916Z", + "start_time": "2025-12-23T09:33:34.225456919Z", + "end_time": "2025-12-23T09:33:34.225522421Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:34.225051701Z", + "publish_time": "2025-12-23T09:33:34.225206008Z", + "first_worker_start": "2025-12-23T09:33:34.225442918Z", + "last_worker_end": "2025-12-23T09:33:34.304458Z", + "total_journey_time_ms": 79, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:34.225585224Z", + "start_time": "2025-12-23T09:33:34.225664327Z", + "end_time": "2025-12-23T09:33:34.225705229Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:34.225781Z", + "start_time": "2025-12-23T09:33:34.225916Z", + "end_time": "2025-12-23T09:33:34.304458Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 78 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:34.225568923Z", + "start_time": "2025-12-23T09:33:34.225627626Z", + "end_time": "2025-12-23T09:33:34.225696529Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:34.225395516Z", + "start_time": "2025-12-23T09:33:34.225442918Z", + "end_time": "2025-12-23T09:33:34.225474819Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 154, + "min_processing_ms": 76, + "max_processing_ms": 78, + "avg_processing_ms": 77, + "median_processing_ms": 78, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3640, + "slowest_section_id": 1, + "slowest_section_time_ms": 79 + } +} diff --git a/data/output/002c998ff22fab91ef8cbee9910baee3f2eeda05.json b/data/output/002c998ff22fab91ef8cbee9910baee3f2eeda05.json new file mode 100644 index 0000000..84e4972 --- /dev/null +++ b/data/output/002c998ff22fab91ef8cbee9910baee3f2eeda05.json @@ -0,0 +1,294 @@ +{ + "file_name": "002c998ff22fab91ef8cbee9910baee3f2eeda05.txt", + "total_words": 568, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "her", + "count": 16 + }, + { + "word": "she", + "count": 15 + }, + { + "word": "was", + "count": 15 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "cliff", + "count": 10 + }, + { + "word": "husband", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Mr .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Anna Edwards .", + "length": 14 + }, + { + "text": "07:35 EST, 13 December 2013 .", + "length": 29 + }, + { + "text": "07:20 EST, 13 December 2013 .", + "length": 29 + }, + { + "text": "rid of her partner once and for all'.", + "length": 37 + }, + { + "text": "The court heard just four months later .", + "length": 40 + }, + { + "text": "violence and somebody who was unlikely to speak to police'.", + "length": 59 + }, + { + "text": "Cliff was again seeing another man which was when she decided to 'be .", + "length": 70 + }, + { + "text": "kill her husband was because he was trusted as a friend, was a man of .", + "length": 71 + }, + { + "text": "Cliff wanted her husband dead after their volatile relationship broke down .", + "length": 76 + }, + { + "text": "The court heard how Cliff wanted her husband dead after their volatile relationship broke down.", + "length": 95 + }, + { + "text": "'This was a very rare case for us, and I have certainly never come across one like this before.", + "length": 95 + }, + { + "text": "Bowen, from Bilston, said: 'She said she wanted her partner killed, wiped off the face of the earth.", + "length": 100 + }, + { + "text": "Yesterday Cliff, from Aldridge, West Midlands, was jailed for six years at Wolverhampton Crown Court.", + "length": 101 + }, + { + "text": "In September last year she left the couple's home with another man but had moved back in by Christmas.", + "length": 102 + }, + { + "text": "Kelly Cliff, 40, plotted to kill Carl Gallagher, 47, after she started a series of relationships with other men.", + "length": 112 + }, + { + "text": "Officers revealed text messages between Cliff and Bowen’s phones proved she was serious about having him killed.", + "length": 114 + }, + { + "text": "'I have a reputation for kicking a** - using my fists - but I do not use tools and do not go around to kill people.", + "length": 115 + }, + { + "text": "'The evidence against her was overwhelming and I’m satisfied that she will now be spending several years behind bars.", + "length": 119 + }, + { + "text": "Cliff denied soliciting the murder of her husband but was found guilty by a jury following a four day trial last month.", + "length": 119 + }, + { + "text": "' The father-of-five was so shocked by the suggestion that he instead went to the police on April 5 and she was arrested.", + "length": 121 + }, + { + "text": "'I have no doubt that Cliff was serious in her plot to have her ex-husband killed, even though they had got back together.", + "length": 122 + }, + { + "text": "Prosecutor Hugh O’Brien-Quinn told the court: 'She wanted him dead - she wanted to be rid of him, rid of him permanently.", + "length": 123 + }, + { + "text": "'Over a number of years he had continually forgiven her but she continued to be unfaithful and wanted him out of the picture.", + "length": 125 + }, + { + "text": "Bowen described the moment she turned up at his home 'out of the blue' to ask how much it would cost to have her husband killed.", + "length": 128 + }, + { + "text": "A cheating wife has been jailed for trying to hire her friend to murder her husband after he found out she was having a string of affairs.", + "length": 138 + }, + { + "text": "The mother-of-two asked friend Dalton Bowen, 47, to arrange for her estranged husband to be 'wiped off the face of the earth' after their 22-year relationship turned sour.", + "length": 171 + }, + { + "text": "After the hearing Detective Constable James Connell, from West Midlands Police, said 'Cliff is a cold and callous individual who has now been convicted of a very serious offence.", + "length": 178 + }, + { + "text": "The court heard the mum-of-two asked friend Dalton Bowen, 45, to arrange her estranged husband's death as she wanted to end their 22-year relationship 'once and for all' 'The reason she chose Dalton Bowen to .", + "length": 209 + }, + { + "text": "Cliff has been jailed for six years after trying to hire a hitman to wipe her husband 'off the face of the earth' But Bowen turned his former neighbour over to police, telling officers: 'I may have a reputation for kicking a** - but I don’t go around killing people'.", + "length": 269 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5837534070014954 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:34.726026574Z", + "first_section_created": "2025-12-23T09:33:34.727815051Z", + "last_section_published": "2025-12-23T09:33:34.72803786Z", + "all_results_received": "2025-12-23T09:33:34.796897912Z", + "output_generated": "2025-12-23T09:33:34.797048918Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:34.727815051Z", + "publish_time": "2025-12-23T09:33:34.72803786Z", + "first_worker_start": "2025-12-23T09:33:34.728676788Z", + "last_worker_end": "2025-12-23T09:33:34.795919Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:34.728618185Z", + "start_time": "2025-12-23T09:33:34.728676788Z", + "end_time": "2025-12-23T09:33:34.72873899Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:34.728782Z", + "start_time": "2025-12-23T09:33:34.728913Z", + "end_time": "2025-12-23T09:33:34.795919Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:34.728608385Z", + "start_time": "2025-12-23T09:33:34.728847395Z", + "end_time": "2025-12-23T09:33:34.729115506Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:34.728604984Z", + "start_time": "2025-12-23T09:33:34.728683388Z", + "end_time": "2025-12-23T09:33:34.728710889Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3147, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/002cceec34994ff6ff91f6232054e5f71b9eb4b5.json b/data/output/002cceec34994ff6ff91f6232054e5f71b9eb4b5.json new file mode 100644 index 0000000..7ee7f12 --- /dev/null +++ b/data/output/002cceec34994ff6ff91f6232054e5f71b9eb4b5.json @@ -0,0 +1,424 @@ +{ + "file_name": "002cceec34994ff6ff91f6232054e5f71b9eb4b5.txt", + "total_words": 992, + "top_n_words": [ + { + "word": "the", + "count": 50 + }, + { + "word": "he", + "count": 29 + }, + { + "word": "cain", + "count": 28 + }, + { + "word": "to", + "count": 28 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "his", + "count": 22 + }, + { + "word": "said", + "count": 20 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "that", + "count": 19 + }, + { + "word": "a", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "Gloria!", + "length": 7 + }, + { + "text": "\" Texas Gov.", + "length": 12 + }, + { + "text": "\" Former Utah Gov.", + "length": 18 + }, + { + "text": "\"Ginger White respects Mr.", + "length": 26 + }, + { + "text": "The crowd chanted, \"Gloria!", + "length": 27 + }, + { + "text": "9 points, the newspaper said.", + "length": 29 + }, + { + "text": "\" before the candidate spoke.", + "length": 29 + }, + { + "text": "\"That in itself is not proof.", + "length": 29 + }, + { + "text": "He said the two were friends.", + "length": 29 + }, + { + "text": "Other candidates were quick to react.", + "length": 37 + }, + { + "text": "Michele Bachmann said in a statement.", + "length": 37 + }, + { + "text": "She described the affair as \"very casual.", + "length": 41 + }, + { + "text": "\"My wife now knows,\" he told the newspaper.", + "length": 43 + }, + { + "text": "would endorse before making his own decision.", + "length": 45 + }, + { + "text": "The poll has a sampling error of plus or minus 4.", + "length": 49 + }, + { + "text": "\" CNN's Rachel Streitfeld contributed to this report.", + "length": 53 + }, + { + "text": "My wife understands that I'm a soft-hearted, giving person.", + "length": 59 + }, + { + "text": "Rick Perry said he knew the Cains made a \"difficult decision.", + "length": 61 + }, + { + "text": "I understand his decision and wish him and his family the best.", + "length": 63 + }, + { + "text": "\" But he denied the relationship was sexual, as White contends.", + "length": 63 + }, + { + "text": "His catchy \"9-9-9\" economic plan is not going anywhere, he said.", + "length": 64 + }, + { + "text": "com, through which he will continue to advocate for his platform.", + "length": 65 + }, + { + "text": "\"My wife and I have talked about it, and I have explained it to her.", + "length": 68 + }, + { + "text": "\"Herman Cain provided an important voice to this process,\" Minnesota Rep.", + "length": 73 + }, + { + "text": "\"Your support has been unwavering and undying,\" Cain told his supporters.", + "length": 73 + }, + { + "text": "\" In a fund-raising letter Tuesday night, Cain referred to White as \"troubled.", + "length": 78 + }, + { + "text": "He will endorse another of the Republican presidential hopefuls soon, he said.", + "length": 78 + }, + { + "text": "\" White issued a statement, through her attorney, after Cain's announcement Saturday.", + "length": 85 + }, + { + "text": "\"I send checks to a lot of people; I help a lot of people,\" Cain told Fox News on Thursday.", + "length": 91 + }, + { + "text": "He helped invigorate conservative voters and our nation with a discussion of major tax reform.", + "length": 94 + }, + { + "text": "\" He repeatedly called the allegations \"false and untrue,\" and added that \"the (media) spin hurts.", + "length": 98 + }, + { + "text": "Cain and his wife, Gloria, held hands as they walked up to the podium where Cain made his remarks in Atlanta.", + "length": 109 + }, + { + "text": "This week, White told the news media that she and Cain engaged in an on-and-off affair for more than 13 years.", + "length": 110 + }, + { + "text": "So the other allegation in terms of it being a 13-year physical relationship, that is her words against my word.", + "length": 112 + }, + { + "text": "Cain's decision regarding his campaign and indeed would have respected any decision he made,\" the statement said.", + "length": 113 + }, + { + "text": "Cain told the newspaper he would drop out of the race if his wife asked him to, but quickly added that she wouldn't.", + "length": 116 + }, + { + "text": "\" Cain's announcement came a month before the Iowa caucuses, the first formal test of the primary season, scheduled for January 3.", + "length": 130 + }, + { + "text": "Cain told the Union Leader in New Hampshire that he repeatedly gave White money to help her with \"month-to-month bills and expenses.", + "length": 132 + }, + { + "text": "\" \"I am not going to be silenced and I will not go away,\" Cain said, announcing what he called his Plan B: A website, TheCainSolutions.", + "length": 135 + }, + { + "text": "Two other women also have said Cain sexually harassed them while they worked at the association, but they have declined to be identified.", + "length": 137 + }, + { + "text": "Cain said he came to the decision after assessing the impact that the allegations were having on his wife, his family and his supporters.", + "length": 137 + }, + { + "text": "While he will still be able to raise and spend campaign funds because he did not officially drop out, Cain's White House bid is effectively over.", + "length": 145 + }, + { + "text": "\" In the interview, Cain said his wife, knew nothing about White nor his financial support for her until the mother of two came forward last week.", + "length": 146 + }, + { + "text": "\"That being said, she is disappointed that he has not apologized for the public statements he has made about her and other women who have spoken out.", + "length": 149 + }, + { + "text": "Even as he stepped aside under the weight of the allegations that have dogged him, Cain said that he was at \"peace with my God\" and \"peace with my wife.", + "length": 152 + }, + { + "text": "Jon Huntsman said Cain brought \"a unique and valuable voice to the debate over how to reform our country's uncompetitive tax code and turn around the economy.", + "length": 158 + }, + { + "text": "\"His ideas and energy generated tremendous enthusiasm for the conservative movement at a time it was so desperately needed to restore confidence in our country.", + "length": 160 + }, + { + "text": "Though Gloria Cain rarely makes public appearances or statements, she told Fox News last month that she believed the sexual harassment allegations were \"unfounded.", + "length": 163 + }, + { + "text": "\" Two women -- Sharon Bialek and Karen Kraushaar -- previously accused Cain of sexually harassing them in the 1990s while he was head of the National Restaurant Association.", + "length": 173 + }, + { + "text": "Respondents said they were most concerned that Cain does not understand important issues, but said the allegations against him contribute to their concern, the newspaper said.", + "length": 175 + }, + { + "text": "New Hampshire Republican officials who supported Cain began to survey their options Saturday, with several state representatives saying their support could go to Gingrich or Ron Paul.", + "length": 183 + }, + { + "text": "He said in the Thursday Union Leader interview that his wife's feelings, as well as the reaction from supporters and donors, would be important factors in deciding whether he will stay the race.", + "length": 194 + }, + { + "text": "\" Fellow Georgian Newt Gingrich said the \"9-9-9\" plan \"got our country talking about the critical issue of how to reform our tax code and he elevated the dialogue of the Republican presidential primary in the process.", + "length": 217 + }, + { + "text": "Cain told staffers earlier this week he was reassessing his campaign in the wake of White's allegation of an affair, and he acknowledged to reporters Wednesday that her account had led to a drop in contributions to his campaign.", + "length": 228 + }, + { + "text": "\" Recently, Cain acknowledged that Ginger White's allegations of an affair have led to a drop in campaign contributions, and a Des Moines Register poll showed his support among likely Republican Iowa caucus-goers has fallen to 8%, down from 23% in October.", + "length": 256 + }, + { + "text": "Atlanta (CNN) -- Republican presidential hopeful Herman Cain told supporters Saturday that he is suspending his presidential campaign, which has become hobbled in recent weeks by allegations of sexual harassment and an Atlanta woman's claim that they carried on a 13-year affair.", + "length": 279 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5886460393667221 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:35.228769923Z", + "first_section_created": "2025-12-23T09:33:35.229107037Z", + "last_section_published": "2025-12-23T09:33:35.229472953Z", + "all_results_received": "2025-12-23T09:33:35.333683819Z", + "output_generated": "2025-12-23T09:33:35.333873128Z", + "total_processing_time_ms": 105, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 104, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:35.229107037Z", + "publish_time": "2025-12-23T09:33:35.229363648Z", + "first_worker_start": "2025-12-23T09:33:35.229837168Z", + "last_worker_end": "2025-12-23T09:33:35.332849Z", + "total_journey_time_ms": 103, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:35.229821768Z", + "start_time": "2025-12-23T09:33:35.229913272Z", + "end_time": "2025-12-23T09:33:35.230014276Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:35.230206Z", + "start_time": "2025-12-23T09:33:35.230356Z", + "end_time": "2025-12-23T09:33:35.332849Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 102 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:35.229770965Z", + "start_time": "2025-12-23T09:33:35.229842469Z", + "end_time": "2025-12-23T09:33:35.230059978Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:35.229729264Z", + "start_time": "2025-12-23T09:33:35.229837168Z", + "end_time": "2025-12-23T09:33:35.229917572Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:35.22939885Z", + "publish_time": "2025-12-23T09:33:35.229472953Z", + "first_worker_start": "2025-12-23T09:33:35.229952873Z", + "last_worker_end": "2025-12-23T09:33:35.296872Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:35.229972574Z", + "start_time": "2025-12-23T09:33:35.230041077Z", + "end_time": "2025-12-23T09:33:35.230061278Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:35.230311Z", + "start_time": "2025-12-23T09:33:35.230423Z", + "end_time": "2025-12-23T09:33:35.296872Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:35.229985775Z", + "start_time": "2025-12-23T09:33:35.230049577Z", + "end_time": "2025-12-23T09:33:35.230080479Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:35.229923472Z", + "start_time": "2025-12-23T09:33:35.229952873Z", + "end_time": "2025-12-23T09:33:35.229962074Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 168, + "min_processing_ms": 66, + "max_processing_ms": 102, + "avg_processing_ms": 84, + "median_processing_ms": 102, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2899, + "slowest_section_id": 0, + "slowest_section_time_ms": 103 + } +} diff --git a/data/output/002cdacb3ec6cd2ee9c16b2ab1413c59577f8fca.json b/data/output/002cdacb3ec6cd2ee9c16b2ab1413c59577f8fca.json new file mode 100644 index 0000000..69c669f --- /dev/null +++ b/data/output/002cdacb3ec6cd2ee9c16b2ab1413c59577f8fca.json @@ -0,0 +1,282 @@ +{ + "file_name": "002cdacb3ec6cd2ee9c16b2ab1413c59577f8fca.txt", + "total_words": 559, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "protein", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "is", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "juno", + "count": 10 + }, + { + "word": "on", + "count": 10 + }, + { + "word": "that", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Fiona Macrae .", + "length": 14 + }, + { + "text": "The discovery could also lead to new contraceptives.", + "length": 52 + }, + { + "text": "’ However, he questioned how many couples would benefit.", + "length": 58 + }, + { + "text": "‘We are now testing whether Juno is involved in these cases.", + "length": 62 + }, + { + "text": "Without Juno, the sperm and egg cannot fuse to create new life.", + "length": 63 + }, + { + "text": "‘Without this essential interaction, fertilisation just cannot happen.", + "length": 72 + }, + { + "text": "Called Juno, it 'mates' with a partner protein on sperm, at the moment of conception .", + "length": 86 + }, + { + "text": "Although they made eggs, sperm were unable to bind to them, the journal Nature reports.", + "length": 87 + }, + { + "text": "Protection: The protein may also play a role in developing contraceptives of the future .", + "length": 89 + }, + { + "text": "This ensures only sperm binds with the egg and increases the odds of a successful pregnancy.", + "length": 92 + }, + { + "text": "Importantly, mice that were unable to make Juno seemed perfectly healthy but were infertile.", + "length": 92 + }, + { + "text": "On finding Juno, they did a series of tests that proved it to be necessary for fertilisation.", + "length": 93 + }, + { + "text": "We may be able to use this discovery to improve fertility treatments and develop new contraceptives.", + "length": 100 + }, + { + "text": "The experiments also revealed that Juno is quickly removed from the surface of an egg after fertilisation occurs.", + "length": 113 + }, + { + "text": "British researchers have shown that the egg’s surface is studded with a protein that is essential for fertility.", + "length": 114 + }, + { + "text": "This would bypass the flawed Juno protein on the surface and could allow them to achieve their dream of motherhood.", + "length": 115 + }, + { + "text": "Hope: British researchers have discovered a protein on the surface of a woman's egg that is essential for fertility.", + "length": 116 + }, + { + "text": "Contraceptive jabs could also be used to control badgers, deer and other populations, rather than resorting to culling.", + "length": 119 + }, + { + "text": "The Sanger Institute team created an artificial version of the sperm protein and the used it to hunt out its egg version.", + "length": 121 + }, + { + "text": "Moving forward: Scientists now hope the discovery of the new protein could help improve fertility treatments such as IVF .", + "length": 122 + }, + { + "text": "’ Since 2005, when an essential protein was found sitting on head of sperm, the search has been on for its female counterpart.", + "length": 128 + }, + { + "text": "One of the secrets of creating new life has been discovered by scientists – bringing hope of better treatment for childless women.", + "length": 132 + }, + { + "text": "As they wouldn’t be based on hormones, they should be free of the mood-swings, acne and other unwelcome side-effects of existing drugs.", + "length": 137 + }, + { + "text": "’ Dr Allan Pacey, a Sheffield University fertility expert, said: ‘The identification of the Juno protein opens up many exciting prospects.", + "length": 142 + }, + { + "text": "‘Perhaps the most obvious biomedical application of this finding is whether screening for this protein could be used as a test of fertility.", + "length": 142 + }, + { + "text": "It is thought that a similar protein is found on women’s eggs – and that flaws in it could explain why some couples can’t have children.", + "length": 142 + }, + { + "text": "Christened Juno, after the Roman goddess of marriage and fertility, it ‘mates’ with a partner protein on sperm, at the moment of conception.", + "length": 144 + }, + { + "text": "If this is the case, women found to have a faulty Juno protein could be treated with ICSI, a special type of IVF, in which the sperm is injected directly into the egg.", + "length": 167 + }, + { + "text": "Researcher Dr Erin Bianchi said: ‘Infertility is becoming an increasing problem, especially in Western countries, and it is remarkable that 20 per cent of infertility cases have an unexplained cause.", + "length": 201 + }, + { + "text": "Researcher Gavin Wright, of the Wellcome Trust Sanger Institute near Cambridge, said: ‘We have solved a long-standing mystery in biology by identifying the molecules displayed on all sperm and egg that must bind each other at the moment we were conceived.", + "length": 257 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.45599064230918884 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:35.730240517Z", + "first_section_created": "2025-12-23T09:33:35.730583231Z", + "last_section_published": "2025-12-23T09:33:35.73079344Z", + "all_results_received": "2025-12-23T09:33:35.79215017Z", + "output_generated": "2025-12-23T09:33:35.792312177Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:35.730583231Z", + "publish_time": "2025-12-23T09:33:35.73079344Z", + "first_worker_start": "2025-12-23T09:33:35.731361665Z", + "last_worker_end": "2025-12-23T09:33:35.79125Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:35.731349164Z", + "start_time": "2025-12-23T09:33:35.731404467Z", + "end_time": "2025-12-23T09:33:35.73148357Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:35.731591Z", + "start_time": "2025-12-23T09:33:35.731719Z", + "end_time": "2025-12-23T09:33:35.79125Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:35.731303062Z", + "start_time": "2025-12-23T09:33:35.731373665Z", + "end_time": "2025-12-23T09:33:35.731454769Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:35.731303062Z", + "start_time": "2025-12-23T09:33:35.731361665Z", + "end_time": "2025-12-23T09:33:35.731446868Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3405, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/002ce93dff111b41ca63c7cd6c4697415d2f2f18.json b/data/output/002ce93dff111b41ca63c7cd6c4697415d2f2f18.json new file mode 100644 index 0000000..ffd1114 --- /dev/null +++ b/data/output/002ce93dff111b41ca63c7cd6c4697415d2f2f18.json @@ -0,0 +1,422 @@ +{ + "file_name": "002ce93dff111b41ca63c7cd6c4697415d2f2f18.txt", + "total_words": 793, + "top_n_words": [ + { + "word": "the", + "count": 69 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "german", + "count": 17 + }, + { + "word": "s", + "count": 16 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "he", + "count": 13 + }, + { + "word": "was", + "count": 12 + }, + { + "word": "for", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "When .", + "length": 6 + }, + { + "text": "John B.", + "length": 7 + }, + { + "text": "'This .", + "length": 7 + }, + { + "text": "Germany .", + "length": 9 + }, + { + "text": "statement.", + "length": 10 + }, + { + "text": "Ambassador .", + "length": 12 + }, + { + "text": "ambassador .", + "length": 12 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Germany summoned the U.", + "length": 23 + }, + { + "text": "eavesdropping in Germany.", + "length": 25 + }, + { + "text": "States has been unwilling.", + "length": 26 + }, + { + "text": "Berlin has demanded that .", + "length": 26 + }, + { + "text": "Angela Merkel's mobile phone.", + "length": 29 + }, + { + "text": "talked about foreign affairs.", + "length": 29 + }, + { + "text": "the two countries over alleged U.", + "length": 33 + }, + { + "text": "Spiegel suggested he worked in the .", + "length": 36 + }, + { + "text": "intelligence contractor Edward Snowden .", + "length": 40 + }, + { + "text": "United States voluntarily, the source added.", + "length": 44 + }, + { + "text": "The suspect had offered his services to the .", + "length": 45 + }, + { + "text": "East German Stasi secret police and the Nazis.", + "length": 46 + }, + { + "text": "He was not a top agent,' said one of the politicians.", + "length": 53 + }, + { + "text": "Emerson was called in 'in connection with an investigation by .", + "length": 63 + }, + { + "text": "the federal prosecutor', the German Foreign Ministry said in a .", + "length": 64 + }, + { + "text": "National Security Agency, which included monitoring Chancellor .", + "length": 64 + }, + { + "text": "asked whether Merkel had discussed it with President Barack Obama .", + "length": 67 + }, + { + "text": "contact with the head of the intelligence service, Gerhard Schindler.", + "length": 69 + }, + { + "text": "during a phone conversation on Thursday night, he answered they had .", + "length": 69 + }, + { + "text": "mail room, while Die Welt newspaper reported he had worked in close .", + "length": 69 + }, + { + "text": "was a man who had no direct contact with the investigative committee...", + "length": 71 + }, + { + "text": "is particularly sensitive about surveillance because of abuses by the .", + "length": 71 + }, + { + "text": "in Berlin on Friday following the arrest, heightening friction between .", + "length": 72 + }, + { + "text": "affair risks further straining ties with Washington which were damaged .", + "length": 72 + }, + { + "text": "by revelations last year of mass surveillance of German citizens by the .", + "length": 73 + }, + { + "text": "The arrested man's exact position in the BND remains shrouded in mystery.", + "length": 73 + }, + { + "text": "envoy 'was asked to help in the swift clarification' of the case, it added.", + "length": 75 + }, + { + "text": "The arrested man’s exact position in the BND remains shrouded in mystery.", + "length": 75 + }, + { + "text": "Washington agree to a 'no-spy' agreement with its close ally, but the United .", + "length": 78 + }, + { + "text": "intelligence contractor Edward Snowden, two politicians, who asked to remain anonymous, said.", + "length": 93 + }, + { + "text": "National Security Agency, which included monitoring Chancellor Angela Merkel's mobile phone .", + "length": 93 + }, + { + "text": "The parliamentary committee investigating the NSA affair also holds some confidential meetings.", + "length": 95 + }, + { + "text": "However, German media launched into a fever of speculation today as to who he was and what he was allegedly doing.", + "length": 114 + }, + { + "text": "The United States embassy in Berlin, the State Department in Washington and the White House all declined to comment.", + "length": 116 + }, + { + "text": "Another paper said the affair will be the 'the biggest scandal involving a German-American double agent since the war'.", + "length": 119 + }, + { + "text": "Merkel's spokesman Steffen Seibert said: 'We don't take the matter of spying for foreign intelligence agencies lightly'.", + "length": 120 + }, + { + "text": "The 31-year-old German citizen was being questioned today on suspicion of snooping on Germany's parliamentary inquiry into the NSA affair.", + "length": 138 + }, + { + "text": "German authorities would not go into any details as to the identity of the man except that he worked for the BND intelligence service, Germany's equivalent to MI6.", + "length": 163 + }, + { + "text": "Phone hacked: The affair risks further straining ties with Washington which were damaged by revelations last year of mass surveillance of German citizens by the U.", + "length": 163 + }, + { + "text": "The man has admitted passing to an American contact details about a special German parliamentary committee set up to investigate the spying revelations made by former U.", + "length": 169 + }, + { + "text": "Both lawmakers are members of the nine-person parliamentary control committee, whose meetings are confidential, and which is in charge of monitoring German intelligence.", + "length": 169 + }, + { + "text": "Snooping: The 31-year-old German citizen was being questioned today on suspicion of snooping on Germany's parliamentary inquiry into the NSA affair (pictured) Links to the top?", + "length": 176 + }, + { + "text": "Confession: The man has admitted passing to an American contact details about a special German parliamentary committee set up to investigate the spying revelations made by former U.", + "length": 181 + }, + { + "text": "According to one German newspaper, he was first arrested amid suspicions he tried to make contact with Russian intelligence, only to confess he was in fact spying for the Americans.", + "length": 181 + }, + { + "text": "German detectives have arrested a man suspected of spying for the United States in what could prove the 'biggest scandal involving a German-American double agent since the Second World War'.", + "length": 190 + }, + { + "text": "Bild reported that the suspected mole was a double agent for the Americans for two years and passed them 218 secret documents in exchange for €25,000, including at least three documents related to the Bundestag NSA enquiry.", + "length": 225 + }, + { + "text": "Spiegel suggested he worked in the mail room, while Die Welt newspaper reported he had worked in close contact with the head of the intelligence service, Gerhard Schindler (pictured) The man was said to have met his contact in Austria and passed him the documents on a USB stick, the paper added.", + "length": 296 + }, + { + "text": "Double agent: German authorities would not go into any details as to the identity of the man except that he worked for the BND intelligence service, Germany's equivalent to MI6 (pictured: radomes that contain radar antennas stand at an operating facility of the BND, for whom the suspect works) 'The matter is serious, that is very clear,' a German government spokesman told Frankfurter Allgemeine Zeitung newspaper.", + "length": 416 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7002058029174805 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:36.231416598Z", + "first_section_created": "2025-12-23T09:33:36.231758313Z", + "last_section_published": "2025-12-23T09:33:36.232029724Z", + "all_results_received": "2025-12-23T09:33:36.293563162Z", + "output_generated": "2025-12-23T09:33:36.293713568Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:36.231758313Z", + "publish_time": "2025-12-23T09:33:36.232029724Z", + "first_worker_start": "2025-12-23T09:33:36.232430342Z", + "last_worker_end": "2025-12-23T09:33:36.292636Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:36.232473843Z", + "start_time": "2025-12-23T09:33:36.232555347Z", + "end_time": "2025-12-23T09:33:36.232653951Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:36.232757Z", + "start_time": "2025-12-23T09:33:36.232915Z", + "end_time": "2025-12-23T09:33:36.292636Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:36.232434242Z", + "start_time": "2025-12-23T09:33:36.232512145Z", + "end_time": "2025-12-23T09:33:36.232647251Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:36.232354838Z", + "start_time": "2025-12-23T09:33:36.232430342Z", + "end_time": "2025-12-23T09:33:36.232469043Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4968, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/002cf22c0f5d4832df1dfacbbf14e6fd267b0eaf.json b/data/output/002cf22c0f5d4832df1dfacbbf14e6fd267b0eaf.json new file mode 100644 index 0000000..313ae15 --- /dev/null +++ b/data/output/002cf22c0f5d4832df1dfacbbf14e6fd267b0eaf.json @@ -0,0 +1,388 @@ +{ + "file_name": "002cf22c0f5d4832df1dfacbbf14e6fd267b0eaf.txt", + "total_words": 1009, + "top_n_words": [ + { + "word": "the", + "count": 46 + }, + { + "word": "of", + "count": 41 + }, + { + "word": "titan", + "count": 26 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "s", + "count": 22 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "with", + "count": 16 + }, + { + "word": "be", + "count": 15 + }, + { + "word": "underground", + "count": 15 + }, + { + "word": "would", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "slowly changing its composition.", + "length": 32 + }, + { + "text": "Jonathan O'Callaghan for MailOnline .", + "length": 37 + }, + { + "text": "Pictured is Titan (foreground) in orbit around Saturn.", + "length": 54 + }, + { + "text": "But scientists think it rains perhaps only every few decades.", + "length": 61 + }, + { + "text": "But scientists think it rains perhaps only every few decades .", + "length": 62 + }, + { + "text": "Importantly, this process would continue up to Titan's surface.", + "length": 63 + }, + { + "text": "It was found that rainfall runoff was likely feeding underground resevoirs.", + "length": 75 + }, + { + "text": "As well as this, they would fall more slowly, drifting down like snowflakes.", + "length": 76 + }, + { + "text": "As well as this, they would fall more slowly, drifting down like snowflakes.", + "length": 76 + }, + { + "text": "‘Now, we have a better idea of what these hidden lakes or oceans could be like.", + "length": 81 + }, + { + "text": "Titan's ‘water’ is liquid methane, CH4, better known on Earth as natural gas.", + "length": 81 + }, + { + "text": "Clathrates that contain methane are found on Earth in some polar and ocean sediments.", + "length": 85 + }, + { + "text": "A researcher in France says bodies of liquid on Titan may be fed by underground springs.", + "length": 88 + }, + { + "text": "Eventually the original methane aquifer would be turned into a propane or ethane aquifer.", + "length": 89 + }, + { + "text": "These in turn would change in chemical composition before adding to existing lakes and seas .", + "length": 93 + }, + { + "text": "These clathrate layers could remain stable as far down as several miles below Titan's surface.", + "length": 94 + }, + { + "text": "In other words, the composition of the lake could indicate what is happening deep underground.", + "length": 94 + }, + { + "text": "Importantly, the chemical transformations taking place underground would affect Titan's surface.", + "length": 96 + }, + { + "text": "’ The finding was made when studying how methane rain interacts with lakes on Titan (pictured).", + "length": 97 + }, + { + "text": "He examined how Titan’s methane rainfall would interact with icy materials in underground reservoirs.", + "length": 103 + }, + { + "text": "This artist's rendering shows a cross-section of the surface and subsurface of Titan Saturn's moon Titan.", + "length": 105 + }, + { + "text": "Regular Earth-water, H2O, would be frozen solid on Titan where the surface temperature is -180°C (-292°F).", + "length": 108 + }, + { + "text": "An 'alkanofer' is a resevoir of liquid hydrocarbons, while clathrates are compounds that trap other substances .", + "length": 112 + }, + { + "text": "With Titan's low gravity and dense atmosphere, methane raindrops could grow twice as large as Earth's raindrops.", + "length": 112 + }, + { + "text": "With Titan's low gravity and dense atmosphere, methane raindrops could grow twice as large as Earth's raindrops.", + "length": 112 + }, + { + "text": "Hundreds of lakes and seas are known of on Titan, Saturn's largest moon, filled not with water but with hydrocarbons such as methane.", + "length": 133 + }, + { + "text": "Clathrates are compounds in which water forms a crystal structure with small cages that trap other substances like methane and ethane.", + "length": 134 + }, + { + "text": "This means researchers could examine the composition of Titan's surface lakes to learn something about what is happening deep underground, said Mousis.", + "length": 151 + }, + { + "text": "They found that this diffusion could cause a new reservoir to form where the bottom of the original underground reservoir meets layers of non-porous ice.", + "length": 154 + }, + { + "text": "One of the peculiar properties of clathrates is that they trap and split molecules into a mix of liquid and solid phases, in a process called fractionation.", + "length": 156 + }, + { + "text": "Because Titan is smaller than Earth, its gravity doesn’t hold onto its gaseous envelope as tightly, so the atmosphere extends 370 miles (595 kilometres) into space.", + "length": 166 + }, + { + "text": "The results of this could be noticed on Titan's surface, meaning scientists may be able work out what is happening underground simply by looking at the moon from space.", + "length": 168 + }, + { + "text": "As on Earth, the climate is driven mostly by changes in the amount of sunlight that comes with the seasons, although the seasons on Titan are about seven Earth years long.", + "length": 171 + }, + { + "text": "In a study scientists found that subsurface 'springs' of propane and ethane may be contributing to some bodies of liquid - and the effects may be noticeable on the surface.", + "length": 172 + }, + { + "text": "The study was carried out by Dr Olivier Mousis, a research associate for the Cassini spacecraft, currently in orbit around Saturn, at the University of Franche-Comté, France.", + "length": 175 + }, + { + "text": "With its thick atmosphere and organic-rich chemistry, Titan resembles a frozen version of Earth several billion years ago, before life began pumping oxygen into our atmosphere.", + "length": 176 + }, + { + "text": "And with colleagues he found that runoff from this rainfall was likely leading to the formation of springs of propane and ethane, which in turn were feeding the lakes and seas.", + "length": 176 + }, + { + "text": "Most of these bodies of liquid are thought to be replenished by rainfall from clouds in the moon's atmosphere, but now research says underground reservoirs may also be feeding them.", + "length": 181 + }, + { + "text": "On Titan, the surface pressure and temperature should allow clathrates to form when liquid hydrocarbons come into contact with water ice, which is a major component of the moon's crust.", + "length": 185 + }, + { + "text": "The scientists modeled how a subsurface reservoir of liquid hydrocarbons, also called an 'alkanofer,' once filled with methane rainfall runoff, would diffuse through Titan's porous, icy crust.", + "length": 192 + }, + { + "text": "Lakes and rivers fed by springs from propane or ethane subsurface reservoirs would show the same kind of composition, whereas those fed by rainfall would be different and contain a significant fraction of methane.", + "length": 213 + }, + { + "text": "‘We knew that a significant fraction of the lakes on Titan's surface might possibly be connected with hidden bodies of liquid beneath Titan's crust, but we just didn't know how they would interact,’ said Mousis.", + "length": 215 + }, + { + "text": "Lakes fed by these propane or ethane subsurface reservoirs would show the same kind of composition, whereas those fed by rainfall would be different and contain methane, nitrogen, and trace amounts of argon and carbon monoxide.", + "length": 227 + }, + { + "text": "The process of underground reservoirs feeding lakes and seas (illustrated) could be spotted on the surface, meaning scientists may be able to work out what is happening underground on Titan just by observing the moon from space .", + "length": 229 + }, + { + "text": "Mousis and his colleagues came to the conclusion when they found that the formation of materials called clathrates changes the chemical composition of the rainfall runoff that feeds hydrocarbon 'aquifers' - reservoirs of liquid underground.", + "length": 240 + }, + { + "text": "'Our study shows that the composition of Titan's underground liquid reservoirs can change significantly through their interaction with the icy subsurface, provided the reservoirs are cut off from the atmosphere for some period of time,' said Dr Mathieu Choukroun of Nasa's Jet Propulsion Laboratory (JPL), one of three co-authors of the study with Dr Mousis.", + "length": 358 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5343802571296692 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:36.732853091Z", + "first_section_created": "2025-12-23T09:33:36.733201206Z", + "last_section_published": "2025-12-23T09:33:36.733604523Z", + "all_results_received": "2025-12-23T09:33:36.819142689Z", + "output_generated": "2025-12-23T09:33:36.819305796Z", + "total_processing_time_ms": 86, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 85, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:36.733201206Z", + "publish_time": "2025-12-23T09:33:36.733494818Z", + "first_worker_start": "2025-12-23T09:33:36.733900236Z", + "last_worker_end": "2025-12-23T09:33:36.818331Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:36.734134246Z", + "start_time": "2025-12-23T09:33:36.734207349Z", + "end_time": "2025-12-23T09:33:36.734319153Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:36.734494Z", + "start_time": "2025-12-23T09:33:36.734624Z", + "end_time": "2025-12-23T09:33:36.818331Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 83 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:36.733917336Z", + "start_time": "2025-12-23T09:33:36.733979239Z", + "end_time": "2025-12-23T09:33:36.734080443Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:36.733829833Z", + "start_time": "2025-12-23T09:33:36.733900236Z", + "end_time": "2025-12-23T09:33:36.733951838Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:36.73353902Z", + "publish_time": "2025-12-23T09:33:36.733604523Z", + "first_worker_start": "2025-12-23T09:33:36.734222949Z", + "last_worker_end": "2025-12-23T09:33:36.808167Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:36.734191748Z", + "start_time": "2025-12-23T09:33:36.734222949Z", + "end_time": "2025-12-23T09:33:36.734253851Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:36.734497Z", + "start_time": "2025-12-23T09:33:36.73464Z", + "end_time": "2025-12-23T09:33:36.808167Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:36.734180948Z", + "start_time": "2025-12-23T09:33:36.73423735Z", + "end_time": "2025-12-23T09:33:36.734270351Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:36.734191648Z", + "start_time": "2025-12-23T09:33:36.73422775Z", + "end_time": "2025-12-23T09:33:36.73423745Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 156, + "min_processing_ms": 73, + "max_processing_ms": 83, + "avg_processing_ms": 78, + "median_processing_ms": 83, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3126, + "slowest_section_id": 0, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/002dea5d0cf140f042f2392fb7315a2f2f6a7e8f.json b/data/output/002dea5d0cf140f042f2392fb7315a2f2f6a7e8f.json new file mode 100644 index 0000000..21d2abd --- /dev/null +++ b/data/output/002dea5d0cf140f042f2392fb7315a2f2f6a7e8f.json @@ -0,0 +1,206 @@ +{ + "file_name": "002dea5d0cf140f042f2392fb7315a2f2f6a7e8f.txt", + "total_words": 197, + "top_n_words": [ + { + "word": "the", + "count": 10 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "agger", + "count": 6 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "a", + "count": 4 + }, + { + "word": "for", + "count": 4 + }, + { + "word": "in", + "count": 4 + }, + { + "word": "s", + "count": 4 + }, + { + "word": "an", + "count": 3 + }, + { + "word": "are", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Who wants me?", + "length": 13 + }, + { + "text": "John Drayton .", + "length": 14 + }, + { + "text": "VIDEO Scroll down to watch Daniel Agger score an own goal against Roma .", + "length": 72 + }, + { + "text": "Rebuffed: Barcelona have seen an approach for PSG defender Marquinhos turned down .", + "length": 83 + }, + { + "text": "The La Liga giants have seen an approach for PSG's Marquinhos rebuffed in recent days.", + "length": 86 + }, + { + "text": "Barcelona are considering a bid for Liverpool centre-half Daniel Agger, according to reports in Spain.", + "length": 102 + }, + { + "text": "Liverpool centre-back Daniel Agger could be on his way to Barcelona in a £12m deal, according to reports in the Spanish press .", + "length": 128 + }, + { + "text": "Sunday's edition of Sport claims the Catalans are lining up a £12m offer for the Denmark international as they look to strengthen their defence.", + "length": 145 + }, + { + "text": "Ajax's Daley Blind, Arsenal's Thomas Vermaelen and Jan Vertonghen of Tottenham are also being considered but Barca believe Agger, 29, represents the best value.", + "length": 160 + }, + { + "text": "Agger will likely find himself out of favour at Anfield next season following the arrival of Dejan Lovren from Southampton and a move to the Nou Camp could be appealing.", + "length": 169 + }, + { + "text": "Saying farewell: Denmark international Agger has been at Anfield since 2006 but is likely to be on the periphery of the squad next season following the arrival of Dejan Lovren .", + "length": 177 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5397916436195374 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:37.234389388Z", + "first_section_created": "2025-12-23T09:33:37.2346883Z", + "last_section_published": "2025-12-23T09:33:37.23491391Z", + "all_results_received": "2025-12-23T09:33:37.297802006Z", + "output_generated": "2025-12-23T09:33:37.297932011Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:37.2346883Z", + "publish_time": "2025-12-23T09:33:37.23491391Z", + "first_worker_start": "2025-12-23T09:33:37.235520736Z", + "last_worker_end": "2025-12-23T09:33:37.296854Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:37.235472534Z", + "start_time": "2025-12-23T09:33:37.235547437Z", + "end_time": "2025-12-23T09:33:37.235574338Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:37.235725Z", + "start_time": "2025-12-23T09:33:37.235874Z", + "end_time": "2025-12-23T09:33:37.296854Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:37.235474834Z", + "start_time": "2025-12-23T09:33:37.235527436Z", + "end_time": "2025-12-23T09:33:37.235577338Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:37.235466034Z", + "start_time": "2025-12-23T09:33:37.235520736Z", + "end_time": "2025-12-23T09:33:37.235556038Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1164, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/002deab220fad1e2413d0793b6a052f2921380f1.json b/data/output/002deab220fad1e2413d0793b6a052f2921380f1.json new file mode 100644 index 0000000..d9db4c0 --- /dev/null +++ b/data/output/002deab220fad1e2413d0793b6a052f2921380f1.json @@ -0,0 +1,428 @@ +{ + "file_name": "002deab220fad1e2413d0793b6a052f2921380f1.txt", + "total_words": 922, + "top_n_words": [ + { + "word": "the", + "count": 68 + }, + { + "word": "of", + "count": 35 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "war", + "count": 19 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "britain", + "count": 15 + }, + { + "word": "was", + "count": 15 + }, + { + "word": "have", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "It .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "That .", + "length": 6 + }, + { + "text": "A final .", + "length": 9 + }, + { + "text": "Although .", + "length": 10 + }, + { + "text": "'Arguments .", + "length": 12 + }, + { + "text": "repelled and, as U.", + "length": 19 + }, + { + "text": "70million military personnel.", + "length": 29 + }, + { + "text": "shots of the First World War.", + "length": 29 + }, + { + "text": "think in the end Britain did.", + "length": 29 + }, + { + "text": "Mr Ferguson - who is Tisch Professor .", + "length": 38 + }, + { + "text": "The Great War - as it was soon to be known .", + "length": 44 + }, + { + "text": "lion' and accused him of historical ignorance.", + "length": 46 + }, + { + "text": "forces began to enter the trenches, the Allies .", + "length": 48 + }, + { + "text": "resorting to practically suicidal human wave attacks.", + "length": 53 + }, + { + "text": "the planet via the colonies of the European imperial powers.", + "length": 60 + }, + { + "text": "1918, the powers of Central Europe were exhausted by fighting.", + "length": 62 + }, + { + "text": "'Britain could indeed have lived with a German victory,' he said.", + "length": 65 + }, + { + "text": "weapons were not matched by changes in strategy, with both sides .", + "length": 66 + }, + { + "text": "left the British empire at the end of it all in a much weakened state.", + "length": 70 + }, + { + "text": "'The cost of the First World War to Britain was catastrophic, and it .", + "length": 70 + }, + { + "text": "- was the first military conflict to be fought on an industrial scale.", + "length": 70 + }, + { + "text": "the technological advances that led to increases in the lethality of .", + "length": 70 + }, + { + "text": "to rain down shells on Belgrade, the Serbian capital - the very first .", + "length": 71 + }, + { + "text": "was on the 28 July 1914 that artillery units of Austria-Hungary began .", + "length": 71 + }, + { + "text": "attack was to start a chain reaction that, within weeks, embroiled all .", + "length": 72 + }, + { + "text": "about honour, of course, resonate today, as they resonated in 1914 but .", + "length": 72 + }, + { + "text": "you can pay too high a price for upholding that notion of honour, and I .", + "length": 73 + }, + { + "text": "of the world's great powers into a global war which mobilised more than .", + "length": 73 + }, + { + "text": "much of the warfare took place in Europe, battle was soon joined across .", + "length": 73 + }, + { + "text": "of History at Harvard and a fellow of Jesus College, Oxford - continued: .", + "length": 74 + }, + { + "text": "last-ditch offensive along the Western Front by Germany was successfully .", + "length": 74 + }, + { + "text": "What's more, it would have been in Britain's interests to stay out in 1914.", + "length": 75 + }, + { + "text": "'What's more, it would have been in Britain's interests to stay out in 1914.", + "length": 76 + }, + { + "text": "staged a series of successful advances, forcing the enemy to surrender on November 11.", + "length": 86 + }, + { + "text": "Carnage: Soldiers carrying a stretcher on the front line in Flanders during the First World War .", + "length": 97 + }, + { + "text": "' Under fire: Education Secretary Michael Gove is embroiled in a political row about the war's origins .", + "length": 104 + }, + { + "text": "But his intervention in the First World War debate puts him on the side of figures such as Cambridge professor Sir Richard J.", + "length": 125 + }, + { + "text": "'A better strategy would have been to wait and deal with the German challenge later, when Britain could respond on its own terms.", + "length": 129 + }, + { + "text": "In an interview with BBC History magazine, the Scottish professor and author said: 'Britain could indeed have lived with a German victory.", + "length": 138 + }, + { + "text": "After Mr Ferguson's interview was released, other historians stuck up for Mr Gove's point of view and insisted that the war was justified.", + "length": 138 + }, + { + "text": "' Mr Ferguson concluded: 'We should not think of this as some great victory or dreadful crime, but more as the biggest error in modern history.", + "length": 143 + }, + { + "text": "'For Britain it would ultimately have been far better to have thought in terms of the national interest rather than in terms of a dated treaty.", + "length": 143 + }, + { + "text": "' He has previously been seen as an ally of Mr Gove, having advised the minister on the new history syllabus and spoken out in support of his educational reforms.", + "length": 162 + }, + { + "text": "'Realism in foreign policy has a long and distinguished tradition, not least in Britain - otherwise the French would never complain about \"perfidious Albion\",' he said.", + "length": 168 + }, + { + "text": "' Some have argued that Britain had a moral obligation to join the war because of its treaty with Belgium, which was invaded by Germany, but the historian dismissed this idea.", + "length": 175 + }, + { + "text": "He faced a backlash from some eminent historians, as well as his Labour opposite number Tristram Hunt - but Mr Ferguson is the first figure from the Right to speak out against his views.", + "length": 186 + }, + { + "text": "He is the latest historian to weigh in on the origins of the war after Michael Gove took aim at 'Left-wing academics' who have perpetuated a view of the conflict as an unmitigated disaster.", + "length": 189 + }, + { + "text": "Harvard professor Niall Ferguson argues that Britain should never have entered the war against Germany, saying that 'the cost was catastrophic' and it was not worth the millions of lives lost.", + "length": 192 + }, + { + "text": "' Mr Gove wrote in the Mail that the First World War was a 'just war', essential for 'defending the western liberal order' by constraining the imperial ambitions of Germany and Austria-Hungary.", + "length": 193 + }, + { + "text": "The Education Secretary blamed experts for the picture of the First World War put forward in popular culture such as Blackadder, which focuses on the casualties of battle and the blunders made by aristocratic officers.", + "length": 218 + }, + { + "text": "But Mr Ferguson, 49 - who is presenting a BBC2 series based on his 1998 book The Pity of War - disputed this judgement, saying that sending millions of troops to the continent was unnecessary to defend Britain's interests.", + "length": 222 + }, + { + "text": "Gary Sheffield, professor of war studies at the University of Wolverhampton, wrote on Twitter: 'Contrary to Niall Ferguson's views, for Britain to have stayed out of First World War would have disastrous for Britain and Europe.", + "length": 227 + }, + { + "text": "' 'Even if Germany had defeated France and Russia, it would have had a pretty massive challenge on its hands trying to run the new German-dominated Europe and would have remained significantly weaker than the British Empire in naval and financial terms.", + "length": 253 + }, + { + "text": "Speaking out: Niall Ferguson described the First World War as 'the biggest error in modern history' The political row over the First World War has intensified after one of Britain's best-known historians declared that the conflict was 'the biggest error in modern history'.", + "length": 273 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8450334668159485 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:37.735700875Z", + "first_section_created": "2025-12-23T09:33:37.736071691Z", + "last_section_published": "2025-12-23T09:33:37.736497909Z", + "all_results_received": "2025-12-23T09:33:37.825342317Z", + "output_generated": "2025-12-23T09:33:37.826224455Z", + "total_processing_time_ms": 90, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 88, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:37.736071691Z", + "publish_time": "2025-12-23T09:33:37.736331502Z", + "first_worker_start": "2025-12-23T09:33:37.737059333Z", + "last_worker_end": "2025-12-23T09:33:37.804991Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:37.737136036Z", + "start_time": "2025-12-23T09:33:37.737247041Z", + "end_time": "2025-12-23T09:33:37.737388347Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:37.737343Z", + "start_time": "2025-12-23T09:33:37.737461Z", + "end_time": "2025-12-23T09:33:37.804991Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:37.737109835Z", + "start_time": "2025-12-23T09:33:37.737186138Z", + "end_time": "2025-12-23T09:33:37.737296743Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:37.73698583Z", + "start_time": "2025-12-23T09:33:37.737059333Z", + "end_time": "2025-12-23T09:33:37.737127636Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:37.736384404Z", + "publish_time": "2025-12-23T09:33:37.736497909Z", + "first_worker_start": "2025-12-23T09:33:37.737127736Z", + "last_worker_end": "2025-12-23T09:33:37.824405Z", + "total_journey_time_ms": 88, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:37.737134736Z", + "start_time": "2025-12-23T09:33:37.737189139Z", + "end_time": "2025-12-23T09:33:37.737202239Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:37.737336Z", + "start_time": "2025-12-23T09:33:37.737469Z", + "end_time": "2025-12-23T09:33:37.824405Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 86 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:37.737089334Z", + "start_time": "2025-12-23T09:33:37.737132736Z", + "end_time": "2025-12-23T09:33:37.737154337Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:37.737087034Z", + "start_time": "2025-12-23T09:33:37.737127736Z", + "end_time": "2025-12-23T09:33:37.737134036Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 153, + "min_processing_ms": 67, + "max_processing_ms": 86, + "avg_processing_ms": 76, + "median_processing_ms": 86, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2707, + "slowest_section_id": 1, + "slowest_section_time_ms": 88 + } +} diff --git a/data/output/002e4c66a7609bb3291ba367eec194a4468bb3ac.json b/data/output/002e4c66a7609bb3291ba367eec194a4468bb3ac.json new file mode 100644 index 0000000..7822cf3 --- /dev/null +++ b/data/output/002e4c66a7609bb3291ba367eec194a4468bb3ac.json @@ -0,0 +1,492 @@ +{ + "file_name": "002e4c66a7609bb3291ba367eec194a4468bb3ac.txt", + "total_words": 1235, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "a", + "count": 38 + }, + { + "word": "and", + "count": 36 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "as", + "count": 19 + }, + { + "word": "i", + "count": 18 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "people", + "count": 18 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "he", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Patrick Strudwick .", + "length": 19 + }, + { + "text": "I’m very proud of them.", + "length": 25 + }, + { + "text": "So there are pros and cos.", + "length": 26 + }, + { + "text": "‘I’m not going to lie.", + "length": 26 + }, + { + "text": "music drowns out my speech.", + "length": 27 + }, + { + "text": "17:00 EST, 15 February 2014 .", + "length": 29 + }, + { + "text": "17:01 EST, 15 February 2014 .", + "length": 29 + }, + { + "text": "‘I can sense condescension.", + "length": 29 + }, + { + "text": "‘It’s been hard,’ he says.", + "length": 32 + }, + { + "text": "’ Some women become maternal towards him.", + "length": 43 + }, + { + "text": "He already has a role model: his big brother.", + "length": 45 + }, + { + "text": "We have a right to fall in love,’ adds Sarah.", + "length": 47 + }, + { + "text": "’ Even making the first move has its obstacles.", + "length": 49 + }, + { + "text": "‘We’re all different and all part of society.", + "length": 49 + }, + { + "text": "They say, “Thanks for telling me,” or they’re intrigued.", + "length": 62 + }, + { + "text": "But there are advantages to being with another disabled person.", + "length": 63 + }, + { + "text": "And I also try to resist those who think I’m somehow inspiring.", + "length": 65 + }, + { + "text": "‘They’re writing off a tenth of the population,’ says Colin.", + "length": 66 + }, + { + "text": "But the Games hasn’t had as big an impact as we would have hoped.", + "length": 67 + }, + { + "text": "Colin is working on a PhD as well as appearing in the hit BBC show.", + "length": 67 + }, + { + "text": "And like his character, he’s looking for love and wanting a family.", + "length": 69 + }, + { + "text": "We can’t just do a one-off blast to inclusion – then it fades way.", + "length": 70 + }, + { + "text": "‘But that can also make you think you have more in common than you do.", + "length": 72 + }, + { + "text": "We need to put the time, effort and resources into continuing that legacy.", + "length": 74 + }, + { + "text": "We need disabled people to be valued as contributors to our whole society.", + "length": 74 + }, + { + "text": "’ Colin would like to have children and is optimistic about finding love.", + "length": 75 + }, + { + "text": "’ Mostly he is an old-fashioned romantic – although there can be pitfalls.", + "length": 78 + }, + { + "text": "‘So Call The Midwife is a big “I can do it even if you think I can’t”.", + "length": 78 + }, + { + "text": "It’s about a similar outlook and whether we can connect on an emotional level.", + "length": 80 + }, + { + "text": "They have a very happy, loving relationship despite discrimination on a few levels.", + "length": 83 + }, + { + "text": "The desperately sad scenario was a routine way of handling the situation in the 1950s.", + "length": 86 + }, + { + "text": "Colin, 27, was once told a career in showbusiness would be little more than a pipedream.", + "length": 88 + }, + { + "text": "‘Inclusion is the main principle disabled people – and society – should strive for.", + "length": 89 + }, + { + "text": "I just try to see the person as an individual, not according to what their disability is.", + "length": 89 + }, + { + "text": "He wasn’t expected to survive, but surpassed doctors’ expectations and learnt to walk and talk.", + "length": 99 + }, + { + "text": "Against the odds: Colin Young's cerebral palsy, has not stopped him from pursuing an acting career .", + "length": 100 + }, + { + "text": "‘Nathan, who’s 29, has arthritis and is in a civil partnership with a man who has cerebral palsy.", + "length": 101 + }, + { + "text": "I always tell women before we meet [about having cerebral palsy] and normally the reaction is positive.", + "length": 103 + }, + { + "text": "Love story: Colin as Jacob and Miranda Hart as Chummy in Sunday's controversial episode of Call The Midwife .", + "length": 109 + }, + { + "text": "His first girlfriend, for example, went to the same sixth-form boarding college for disabled pupils as he did.", + "length": 110 + }, + { + "text": "‘Also, buying someone a drink is tricky as I can’t carry a glass,’ he says, sipping tea through a straw.", + "length": 110 + }, + { + "text": "‘They said no one would want to watch a disabled person on TV,’ he reveals – declining to name the culprit.", + "length": 113 + }, + { + "text": "Online dating has proved fruitful, though, and he prefers to use generic sites rather than those for the disabled.", + "length": 114 + }, + { + "text": "Campaigners have welcomed the storyline, saying it showed how far we have come as a society in accepting diversity.", + "length": 115 + }, + { + "text": "‘You have a mutual understanding of what it’s like having an impairment so you don’t need to explain,’ he says.", + "length": 119 + }, + { + "text": "’ When dates don’t progress beyond a first meeting, he is left wondering whether it’s because of his cerebral palsy.", + "length": 122 + }, + { + "text": "‘But you can’t exactly ask people – they would get defensive as everyone likes to portray themselves as open-minded.", + "length": 122 + }, + { + "text": "'Jacob is a young guy wanting a family and being in love rather than just a person with a disability who has to be cared for.", + "length": 125 + }, + { + "text": "’ Colin was diagnosed with cerebral palsy aged two after a traumatic birth in which he was strangled by the umbilical cord.", + "length": 125 + }, + { + "text": "’ But a 2008 poll by ICM Research found 70 per cent of able-bodied people wouldn’t date someone with a physical disability.", + "length": 127 + }, + { + "text": "‘Hearing people talk about how attractive the athletes were, like Jonnie Peacock, was the best thing – that was incredible.", + "length": 127 + }, + { + "text": "’ Both welcome anything that gets people talking about equality, and accepting that disabled people have romantic relationships.", + "length": 130 + }, + { + "text": "’ On-screen love: Jacob Milligan, played by Colin, and Sally Harper, played by Sarah Gordy, are forced apart when Sally falls pregnant .", + "length": 138 + }, + { + "text": "Sally’s parents are disgusted to discover she is expecting Jacob’s child, branding it unnatural and preventing him from seeing her again.", + "length": 141 + }, + { + "text": "‘There have been barriers to having romantic nights alone as neither of us can pour a glass of wine or eat alone, but laughing it off helps.", + "length": 142 + }, + { + "text": "‘Apart from one or two able-bodied people, I’ve mostly dated disabled people,’ he says, pointing out this is partly because of his environment.", + "length": 149 + }, + { + "text": "‘My brother and his partner are both disabled and I went to a school for disabled people so I grew up seeing disabled people fall in love,’ he says.", + "length": 152 + }, + { + "text": "’ Although we’ve witnessed a sea change in attitudes since the 1950s, Colin is deeply concerned about complacency settling in after the 2012 Paralympics.", + "length": 157 + }, + { + "text": "Colin says: ‘It was very courageous of Call The Midwife to bring to the forefront such an unspoken about topic as people with disabilities having relationships.", + "length": 162 + }, + { + "text": "His longest relationship has been 18 months, and the dating game for someone who has a personal assistant to help him dress, cook and eat has practical challenges.", + "length": 163 + }, + { + "text": "Yes, there have been barriers and I’ve worked hard to break them down, but I wouldn’t call that anything other than fighting for what is rightfully achievable.", + "length": 163 + }, + { + "text": "They play a young couple, Sally Harper and Jacob Milligan, who are both ‘patients’ in a home for the disabled – forced apart, and mocked when she becomes pregnant.", + "length": 169 + }, + { + "text": "But even 60 years after the drama is set, portrayals of sex and love among people with disabilities are so rare it provokes strong feelings – something that baffles Colin.", + "length": 173 + }, + { + "text": "Starved of oxygen, he suffered brain damage that led to his condition – which affects the way the brain communicates with the muscles, causing stiffness and problems with movement.", + "length": 182 + }, + { + "text": "Yet the actors – who have the same disabilities as their on-screen personas, Sarah with Down’s syndrome and Colin with cerebral palsy – believe there’s still a long way to go.", + "length": 183 + }, + { + "text": "’ Sarah, 25, adds: ‘Although I thank God I wasn’t born at that time, and I got very angry when reading the script about people being put in institutions, some of those attitudes are still around.", + "length": 201 + }, + { + "text": "‘But relationships among disabled people are still taboo, although with shows like The Undateables [Channel 4’s programme about people with disabilities dating] able-bodied people are becoming intrigued.", + "length": 207 + }, + { + "text": "Today, as we sit in a pub in North London, he speaks more slowly and quietly than most – and also with more effort, as cerebral palsy can affect the vocal cords – but this doesn’t dampen his vociferousness.", + "length": 212 + }, + { + "text": "As the stars of tonight’s controversial and much talked-about episode of  Call The Midwife – watched by about nine million viewers each week – actors Colin Young and Sarah Gordy are unarguably hot property.", + "length": 213 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5235470086336136 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:38.237452481Z", + "first_section_created": "2025-12-23T09:33:38.237837497Z", + "last_section_published": "2025-12-23T09:33:38.238277916Z", + "all_results_received": "2025-12-23T09:33:38.33472515Z", + "output_generated": "2025-12-23T09:33:38.33496096Z", + "total_processing_time_ms": 97, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 96, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:38.237837497Z", + "publish_time": "2025-12-23T09:33:38.238094308Z", + "first_worker_start": "2025-12-23T09:33:38.238657233Z", + "last_worker_end": "2025-12-23T09:33:38.33382Z", + "total_journey_time_ms": 95, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:38.238754137Z", + "start_time": "2025-12-23T09:33:38.238808539Z", + "end_time": "2025-12-23T09:33:38.238936644Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:38.238946Z", + "start_time": "2025-12-23T09:33:38.2391Z", + "end_time": "2025-12-23T09:33:38.33382Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 94 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:38.238629831Z", + "start_time": "2025-12-23T09:33:38.238704135Z", + "end_time": "2025-12-23T09:33:38.238846741Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:38.238582929Z", + "start_time": "2025-12-23T09:33:38.238657233Z", + "end_time": "2025-12-23T09:33:38.238715335Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:38.238186712Z", + "publish_time": "2025-12-23T09:33:38.238277916Z", + "first_worker_start": "2025-12-23T09:33:38.238792338Z", + "last_worker_end": "2025-12-23T09:33:38.314108Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:38.238735536Z", + "start_time": "2025-12-23T09:33:38.238792338Z", + "end_time": "2025-12-23T09:33:38.23883444Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:38.23899Z", + "start_time": "2025-12-23T09:33:38.239127Z", + "end_time": "2025-12-23T09:33:38.314108Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:38.238776138Z", + "start_time": "2025-12-23T09:33:38.238813939Z", + "end_time": "2025-12-23T09:33:38.238869842Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:38.238774338Z", + "start_time": "2025-12-23T09:33:38.238872242Z", + "end_time": "2025-12-23T09:33:38.238914244Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 168, + "min_processing_ms": 74, + "max_processing_ms": 94, + "avg_processing_ms": 84, + "median_processing_ms": 94, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3594, + "slowest_section_id": 0, + "slowest_section_time_ms": 95 + } +} diff --git a/data/output/002ef6c89a4b8a2a2eec6eba55b54b313443146f.json b/data/output/002ef6c89a4b8a2a2eec6eba55b54b313443146f.json new file mode 100644 index 0000000..e6e2178 --- /dev/null +++ b/data/output/002ef6c89a4b8a2a2eec6eba55b54b313443146f.json @@ -0,0 +1,310 @@ +{ + "file_name": "002ef6c89a4b8a2a2eec6eba55b54b313443146f.txt", + "total_words": 674, + "top_n_words": [ + { + "word": "the", + "count": 36 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "they", + "count": 13 + }, + { + "word": "back", + "count": 11 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "had", + "count": 10 + }, + { + "word": "of", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "com reported.", + "length": 13 + }, + { + "text": "Lydia Warren .", + "length": 14 + }, + { + "text": "to kill and rob Justin Back.", + "length": 28 + }, + { + "text": "11:59 EST, 25 February 2014 .", + "length": 29 + }, + { + "text": "13:16 EST, 25 February 2014 .", + "length": 29 + }, + { + "text": "They could face the death penalty .", + "length": 35 + }, + { + "text": "They were arrested and admitted to the killing.", + "length": 47 + }, + { + "text": "'They set out to do this,' Fornshell said, Cincinnati.", + "length": 54 + }, + { + "text": "'We believe it to be a situation where they simply wanted .", + "length": 59 + }, + { + "text": "He had been classmates with Myers at Waynesville High School.", + "length": 61 + }, + { + "text": "They are being held in the Warren County Jail on $1 million bonds.", + "length": 66 + }, + { + "text": "The decision was made with the support of the Back family, he said.", + "length": 67 + }, + { + "text": "I can't imagine too many things worse than what they've gone through.", + "length": 69 + }, + { + "text": "'It's just going to be a long time for the family to move forward to some degree.", + "length": 81 + }, + { + "text": "'The family is still reeling from this and they will be for quite some time,' Sims said.", + "length": 88 + }, + { + "text": "They then allegedly bought supplies so that they could make a 'clean kill', the told investigators.", + "length": 99 + }, + { + "text": "Warren County Sheriff Larry Sims added to the news site that he was pleased the death penalty is being sought.", + "length": 110 + }, + { + "text": "They then allegedly took a gun, a safe and some of their victim's clothing to make it look like he had run away from home.", + "length": 122 + }, + { + "text": "Investigations uncovered that a violent robbery had taken place at the home and that Mosley and Myers had been there a day earlier.", + "length": 131 + }, + { + "text": "He had joined the Navy just before his death and was going to leave in two weeks for Recruit Training Command at Great Lakes, Illinois.", + "length": 135 + }, + { + "text": "Myers allegedly shot Back's corpse with the gun and then the pair dumped his body beneath a bridge in Preble County, investigators said.", + "length": 136 + }, + { + "text": "Fornshell said he believes the grand jury included multiple death penalty specifications in the indictment due to teenagers' alleged premeditation.", + "length": 147 + }, + { + "text": "Scene: Although the suspects tried to make it look as if Back had run away, investigators believed a violent robbery had taken place at the home in Waynesville, Ohio .", + "length": 167 + }, + { + "text": "Back's parents returned to the home and alerted police after finding items destroyed or missing and no sign of their son, who was about to move away for Naval training.", + "length": 168 + }, + { + "text": "Distraught: Back''s parents (pictured with Justin, second right, and one of his brothers) are in support of the death penalty for the suspects, the county sheriff said .", + "length": 169 + }, + { + "text": "' Justin Back, who leaves behind two brothers and three sisters, had worked at a local McDonalds for three years before graduating from high school in 2013, his obituary said.", + "length": 175 + }, + { + "text": "' 'Killers': Timothy Mosley, left, and Austin Myers, right, both 19, allegedly choked and stabbed an 18-year-old classmate before shooting his body and dumping it beneath a bridge.", + "length": 180 + }, + { + "text": "The indictments include two counts of aggravated murder, counts of aggravated robbery and burglary, grand theft of a firearm, tampering with evidence, safecracking and abuse of a corpse.", + "length": 186 + }, + { + "text": "Prosecutors are seeking the death penalty for two teenagers who allegedly stabbed a classmate to death after they watched movies together - simply because they wanted to kill and rob him.", + "length": 187 + }, + { + "text": "The duo returned to Back's home the following afternoon and watched movies with him before attacking him in the kitchen, trying to choke him and stabbing him repeatedly, investigators said.", + "length": 189 + }, + { + "text": "Victim: Justin Back (pictured above), who had attended high school with Myers before graduating in 2013, had just joined the Navy and was two weeks away from moving to Illinois for training .", + "length": 191 + }, + { + "text": "A grand jury released nine indictments against the two teens on Monday and Warren County Prosecutor David Fornshell said he will be seeking the death penalty due to the cruelty of the killing.", + "length": 192 + }, + { + "text": "Timothy Mosley, 19, and Austin Myers, 19, allegedly tried to choke Justin Back, 18, at his home in Waynesville, Ohio in January before stabbing him, shooting his body and dumping it under a bridge.", + "length": 197 + }, + { + "text": "Moseley and Myers, who is listed as homeless in court records, visited the home Back - who was just days away from leaving for the Navy - shared with his parents in Wayne County on January 27 so they could prepare for the robbery.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6914864182472229 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:38.739044665Z", + "first_section_created": "2025-12-23T09:33:38.739353277Z", + "last_section_published": "2025-12-23T09:33:38.739666391Z", + "all_results_received": "2025-12-23T09:33:38.80830486Z", + "output_generated": "2025-12-23T09:33:38.808484368Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:38.739353277Z", + "publish_time": "2025-12-23T09:33:38.739666391Z", + "first_worker_start": "2025-12-23T09:33:38.740190312Z", + "last_worker_end": "2025-12-23T09:33:38.807328Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:38.740150411Z", + "start_time": "2025-12-23T09:33:38.740215214Z", + "end_time": "2025-12-23T09:33:38.740295217Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:38.740406Z", + "start_time": "2025-12-23T09:33:38.740543Z", + "end_time": "2025-12-23T09:33:38.807328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:38.74012681Z", + "start_time": "2025-12-23T09:33:38.740190312Z", + "end_time": "2025-12-23T09:33:38.740269616Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:38.74013621Z", + "start_time": "2025-12-23T09:33:38.740216614Z", + "end_time": "2025-12-23T09:33:38.740266016Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3914, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/002f609ecac757cd3ab05f8b7762859af129b750.json b/data/output/002f609ecac757cd3ab05f8b7762859af129b750.json new file mode 100644 index 0000000..997ebe1 --- /dev/null +++ b/data/output/002f609ecac757cd3ab05f8b7762859af129b750.json @@ -0,0 +1,250 @@ +{ + "file_name": "002f609ecac757cd3ab05f8b7762859af129b750.txt", + "total_words": 629, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "obama", + "count": 14 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "that", + "count": 10 + }, + { + "word": "at", + "count": 9 + }, + { + "word": "hispanic", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "C.", + "length": 2 + }, + { + "text": "public for another shot at reform.", + "length": 34 + }, + { + "text": "Angry: The activist, idenitifed as Blanca Hernandez of Washington D.", + "length": 68 + }, + { + "text": ", was shouting remarks at Obama before she was escorted from the premises by security .", + "length": 87 + }, + { + "text": "Obama has missed the annual gala held by the Congressional Hispanic Caucus the last two years.", + "length": 94 + }, + { + "text": "The president reiterated on Thursday he would take action before the end of the year, a pledge that irritated Republicans.", + "length": 122 + }, + { + "text": "Obama's speech was interrupted by a heckler, identified as Blanca Hernandez, who gave him a hard time for failing to take executive action on deportations.", + "length": 155 + }, + { + "text": "'I know there's deep frustration in many communities around the country right now, and I understand that frustration because I share it,' he said at the Washington gala.", + "length": 169 + }, + { + "text": "Opinion: Hernandez heckled Obama over lack of executive action on deportation during the Congressional Hispanic Caucus Institute Awards Gala at the Washington Convention Center .", + "length": 179 + }, + { + "text": "Obama had promised to take that step before the end of the summer, only to delay it because of fears it could hurt Democrats running for election in conservative states in November.", + "length": 181 + }, + { + "text": "Rallying support: US President Barack Obama delivers remarks on immigration reform at the Congressional Hispanic Caucus Institute's 37th Annual Awards Gala in Washington DC on Thursday .", + "length": 186 + }, + { + "text": "'But if anybody wants to know where my heart is or whether I want to have this fight, let me put those questions to rest right now: I am not going to give up this fight until it gets done.", + "length": 188 + }, + { + "text": "But the rest of the crowd largely embraced the president, representing another twist in a relationship that at times resembles that of a married couple repeatedly squabbling and making up.", + "length": 188 + }, + { + "text": "Latinos remain critical to Democrats' hopes of holding onto the White House, not to mention Obama's legacy, so the president said he would use the coming weeks to gin up support among the U.", + "length": 190 + }, + { + "text": "Heckler: A woman who was shouting at President Barack Obama as he addressed the Congressional Hispanic Caucus Institute's 37th annual awards gala is removed from the banquet hall in Washington .", + "length": 194 + }, + { + "text": "Democratic Representative Luis Gutierrez of Illinois, a leading advocate for comprehensive immigration legislation, illustrated the difficult ties Hispanic-Americans have had with Obama during the past six years.", + "length": 212 + }, + { + "text": "Speaking at an annual gala held by Hispanic lawmakers, Obama said he shared the group's frustration but needed its support to make any reform last beyond his presidency, which concludes in a little more than two years.", + "length": 218 + }, + { + "text": "President Barack Obama sought to rally frustrated Latinos on Thursday with a firm promise to keep fighting for immigration reform and a renewed pledge to take executive action this year to reduce deportations of undocumented immigrants.", + "length": 236 + }, + { + "text": "After enraging many Hispanic voters last month by delaying a change in immigration policies until after the November midterm elections, Obama has faced widespread condemnation from a voting bloc that helped him win the presidency in 2008 and 2012.", + "length": 247 + }, + { + "text": "'The president’s promise isn’t about making the best policy or enforcing the law -- it’s an admission that his pledge to not uphold the law in the future would be bad for his party now,' said Senate Republican leader Mitch McConnell in a statement.", + "length": 254 + }, + { + "text": "'It's clear that anybody that looks at this says, ''Wow, we weren't the priority we should have been and we weren't the priority he promised we'd be'',' Gutierrez said in an interview before the speech, while expressing hope that Obama's Thursday remarks would be a positive sign.", + "length": 280 + }, + { + "text": "' Republicans in the House of Representatives blocked bipartisan immigration legislation passed by the Senate in 2013, and the Latino community has demanded Obama deliver on a promise to use his executive authority to ease deportations of some of the more than 11 million undocumented people in the country.", + "length": 307 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6362687349319458 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:39.240380824Z", + "first_section_created": "2025-12-23T09:33:39.240687637Z", + "last_section_published": "2025-12-23T09:33:39.240933147Z", + "all_results_received": "2025-12-23T09:33:39.29887737Z", + "output_generated": "2025-12-23T09:33:39.299063878Z", + "total_processing_time_ms": 58, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 57, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:39.240687637Z", + "publish_time": "2025-12-23T09:33:39.240933147Z", + "first_worker_start": "2025-12-23T09:33:39.241502371Z", + "last_worker_end": "2025-12-23T09:33:39.298004Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:39.241535472Z", + "start_time": "2025-12-23T09:33:39.241613376Z", + "end_time": "2025-12-23T09:33:39.241703179Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:39.241732Z", + "start_time": "2025-12-23T09:33:39.241894Z", + "end_time": "2025-12-23T09:33:39.298004Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:39.241431368Z", + "start_time": "2025-12-23T09:33:39.241503371Z", + "end_time": "2025-12-23T09:33:39.241586575Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:39.241422768Z", + "start_time": "2025-12-23T09:33:39.241502371Z", + "end_time": "2025-12-23T09:33:39.241533372Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3811, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/00304598f909139cdfbb82c283ff2ba59f64c459.json b/data/output/00304598f909139cdfbb82c283ff2ba59f64c459.json new file mode 100644 index 0000000..c778535 --- /dev/null +++ b/data/output/00304598f909139cdfbb82c283ff2ba59f64c459.json @@ -0,0 +1,230 @@ +{ + "file_name": "00304598f909139cdfbb82c283ff2ba59f64c459.txt", + "total_words": 401, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "bellingham", + "count": 8 + }, + { + "word": "her", + "count": 8 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "for", + "count": 7 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "britain", + "count": 6 + }, + { + "word": "mrs", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "So she really will be hugely missed.", + "length": 36 + }, + { + "text": "The combined number of likes and shares reached more than 7,000.", + "length": 64 + }, + { + "text": "’ The heir to the throne added that he was ‘greatly saddened’ by her death.", + "length": 81 + }, + { + "text": "Prince Charles described Mrs Bellingham as a ‘wonderful ambassador for Prime’.", + "length": 82 + }, + { + "text": "The organisation is now part of Prince Charles’s charity Business In The Community.", + "length": 85 + }, + { + "text": "He made the comments at the launch of a report that found Britain must make better use of older workers.", + "length": 104 + }, + { + "text": "A Far Right group used pictures of former television star Lynda Bellingham to attract visitors to its website.", + "length": 110 + }, + { + "text": "She died in the arms of her husband Michael Pattemore, after the disease spread from her colon to other parts of her body.", + "length": 122 + }, + { + "text": "Yesterday, Prince Charles said: ‘She was not only a marvellous actress but also someone who worked tirelessly for others.", + "length": 123 + }, + { + "text": "Posts on the page’s Facebook site include comments such as calls for British Muslims to be ‘wiped out’ and all non-whites deported.", + "length": 137 + }, + { + "text": "Facebook users clicking 'like' on the pictures were not aware that they had been posted by Britain First, a splinter group of the British National Party.", + "length": 153 + }, + { + "text": "Her final appearance on Loose Women, which aired on Wednesday, attracted two million views – double the television programme’s normal viewing figures.", + "length": 154 + }, + { + "text": "Mrs Bellingham, who starred in the OXO adverts and was a panellist on television programme Loose Women, died on Sunday at the age of 66 after a battle with colon cancer.", + "length": 169 + }, + { + "text": "The Facebook sharing of the picture of Mrs Bellingham, with All Creatures Great and Small co-star Christopher Timothy, meant that Britain First’s website attracted hundreds more views.", + "length": 186 + }, + { + "text": "For the final two years of her life, Mrs Bellingham had been an ambassador for the Prince’s Initiative for Mature Enterprise (Prime) which helps the over-50s start their own companies.", + "length": 186 + }, + { + "text": "Online post: The Facebook sharing of the picture of Mrs Bellingham, with All Creatures Great and Small co-star Christopher Timothy, meant that Britain First's website attracted hundreds more views .", + "length": 198 + }, + { + "text": "Britain First, a group which promotes fascist views with the slogan ‘Taking our country back’, posted pictures of Mrs Bellingham hours after her death and tricked users to like and share the pictures.", + "length": 204 + }, + { + "text": "Praise: Prince Charles (left) said Lynda Bellingham (right) was 'not only a marvellous actress but also someone who worked tirelessly for others' Since her death, people across Britain have paid tribute to the former actress.", + "length": 225 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7073459029197693 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:39.741757085Z", + "first_section_created": "2025-12-23T09:33:39.7421096Z", + "last_section_published": "2025-12-23T09:33:39.74234811Z", + "all_results_received": "2025-12-23T09:33:39.801800296Z", + "output_generated": "2025-12-23T09:33:39.801986503Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:39.7421096Z", + "publish_time": "2025-12-23T09:33:39.74234811Z", + "first_worker_start": "2025-12-23T09:33:39.743031839Z", + "last_worker_end": "2025-12-23T09:33:39.800884Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:39.743042939Z", + "start_time": "2025-12-23T09:33:39.743096841Z", + "end_time": "2025-12-23T09:33:39.743135543Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:39.743283Z", + "start_time": "2025-12-23T09:33:39.743421Z", + "end_time": "2025-12-23T09:33:39.800884Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:39.743017138Z", + "start_time": "2025-12-23T09:33:39.743096841Z", + "end_time": "2025-12-23T09:33:39.743167744Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:39.742960236Z", + "start_time": "2025-12-23T09:33:39.743031839Z", + "end_time": "2025-12-23T09:33:39.74305454Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2435, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/0030ea36432cd0a5b36484bb603b3755d2fd0e70.json b/data/output/0030ea36432cd0a5b36484bb603b3755d2fd0e70.json new file mode 100644 index 0000000..9b788d6 --- /dev/null +++ b/data/output/0030ea36432cd0a5b36484bb603b3755d2fd0e70.json @@ -0,0 +1,334 @@ +{ + "file_name": "0030ea36432cd0a5b36484bb603b3755d2fd0e70.txt", + "total_words": 712, + "top_n_words": [ + { + "word": "the", + "count": 58 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "road", + "count": 12 + }, + { + "word": "that", + "count": 12 + }, + { + "word": "are", + "count": 11 + }, + { + "word": "were", + "count": 11 + }, + { + "word": "in", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "'At 9.", + "length": 6 + }, + { + "text": "More to come.", + "length": 13 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "You do 100km/h down that road.", + "length": 30 + }, + { + "text": "'This has come out of left field.", + "length": 33 + }, + { + "text": "Each had been shot multiple times.", + "length": 34 + }, + { + "text": "She assumed the motorist was helping out.", + "length": 41 + }, + { + "text": "'Police have now established a crime scene.", + "length": 43 + }, + { + "text": "'We are a quiet little New South Wales town.", + "length": 44 + }, + { + "text": "'I drove past thinking they had a punctured tyre.", + "length": 49 + }, + { + "text": "' Shots were reportedly heard before the bodies were found.", + "length": 59 + }, + { + "text": "20am on Wednesday and noticed a white car pulled off the road.", + "length": 62 + }, + { + "text": "Resident Kylie Spencer said she was driving into town about 9.", + "length": 62 + }, + { + "text": "'It was just pulled over on the road, nothing suspicious at all.", + "length": 64 + }, + { + "text": "' The mayor did confirm that there is a bikie presence in the town.", + "length": 67 + }, + { + "text": "' A man and a woman have been found dead in the NSW town of Moama .", + "length": 67 + }, + { + "text": "'When the paramedics arrived they confirmed two people were deceased.", + "length": 69 + }, + { + "text": "'Police are heavily involved and treating the case with the greatest urgency.", + "length": 77 + }, + { + "text": "Ms Spencer, a resident of 12 years, said the cars weren't local to that road.", + "length": 77 + }, + { + "text": "The Riverine Herald also reports that an abandoned car was seen by residents.", + "length": 77 + }, + { + "text": "' Ms Spencer said there was another car pulled over next to the white vehicle.", + "length": 78 + }, + { + "text": "The sooner the perpetrator or perpetrators are brought into custody the better.", + "length": 79 + }, + { + "text": "This is a very safe place and we don't by and large have any criminal activity.", + "length": 79 + }, + { + "text": "'I didn't know there were bodies,' Ms Spencer, who lives on the road, told AAP.", + "length": 79 + }, + { + "text": "The man was inside the vehicle and the woman was in the process of exiting the vehicle .", + "length": 88 + }, + { + "text": "The town is in the Riverina region, 220 kilometres north of Melbourne and west of Albury.", + "length": 89 + }, + { + "text": "The bodies of a man and a woman were found in a white sedan on the side of the road (pictured).", + "length": 95 + }, + { + "text": "The man was found dead inside of a white sedan, whilst the woman was positioned halfway out of the vehicle.", + "length": 107 + }, + { + "text": "The bodies were discovered by a local resident on a bush track four kilometres outside of the Moama township.", + "length": 109 + }, + { + "text": "' Councillor Weyrich said that he and the community are shocked, as nothing like this has ever happened before.", + "length": 111 + }, + { + "text": "'I am extremely concerned, the people of Moama need to be very, very careful who they talk to and communicate with.", + "length": 115 + }, + { + "text": "Emergency services, including police and paramedics, were called to the scene at Old Deniliquin Road on Tuesday morning.", + "length": 120 + }, + { + "text": "'We do have a Bandidos clubhouse across the river and a member that lives close by, but I am not saying that they are involved.", + "length": 127 + }, + { + "text": "Police have cordoned the street off after the double murder of a man and woman on the side of the road in NSW border town Moama .", + "length": 129 + }, + { + "text": "'My greatest concern is that we have somebody walking around who is armed and is capable of multiple homicides,' said Mr Weyrich.", + "length": 129 + }, + { + "text": "In a media conference, police said that the victims are yet to be identified but both lived locally and 'obviously knew each other'.", + "length": 132 + }, + { + "text": "Residents of Old Deniliquin Road are not able to return to their homes after police cordoned off the street to establish a crime scene.", + "length": 135 + }, + { + "text": "The Mayor of the town, Thomas Weyrich told Daily Mail Australia that he is greatly concerned about the safety of the residents in Moama.", + "length": 136 + }, + { + "text": "NSW Police media confirmed that the deaths are being treated as suspicious and urged members of the public to come forward with any information.", + "length": 144 + }, + { + "text": "Police from Deniliquin Local Area Command, with the assistance of detectives from State Crime Commands Homicide Squad are investigating the deaths.", + "length": 147 + }, + { + "text": "26am paramedics were initially called out to what we were told was a car accident,' a Victoria Rural Ambulance Service spokesperson told Daily Mail Australia.", + "length": 158 + }, + { + "text": "Police are hunting for a gunman after the shooting murders of a man and woman, whose bodies were discovered on the side of a road in the NSW border town Moama.", + "length": 159 + }, + { + "text": "NSW police have established Strike Force Kennedia to investigate their deaths and detectives have been sent to the town to locate the person or people responsible.", + "length": 163 + }, + { + "text": "'This crime has occurred in your community and we are sure that people have knowledge of what happened and why,' said Superintendent Paul Condon of Deniliquin Local Area Command.", + "length": 178 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5484532117843628 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:40.243131647Z", + "first_section_created": "2025-12-23T09:33:40.244379099Z", + "last_section_published": "2025-12-23T09:33:40.24464291Z", + "all_results_received": "2025-12-23T09:33:40.305333947Z", + "output_generated": "2025-12-23T09:33:40.305526955Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:40.244379099Z", + "publish_time": "2025-12-23T09:33:40.24464291Z", + "first_worker_start": "2025-12-23T09:33:40.245036226Z", + "last_worker_end": "2025-12-23T09:33:40.304445Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:40.244970323Z", + "start_time": "2025-12-23T09:33:40.245036226Z", + "end_time": "2025-12-23T09:33:40.245111329Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:40.245277Z", + "start_time": "2025-12-23T09:33:40.245412Z", + "end_time": "2025-12-23T09:33:40.304445Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:40.244963823Z", + "start_time": "2025-12-23T09:33:40.245047527Z", + "end_time": "2025-12-23T09:33:40.245152131Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:40.245026626Z", + "start_time": "2025-12-23T09:33:40.245096129Z", + "end_time": "2025-12-23T09:33:40.245157031Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4017, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/003102fe2264b62782047b239ac3fac14cd10a94.json b/data/output/003102fe2264b62782047b239ac3fac14cd10a94.json new file mode 100644 index 0000000..25965bd --- /dev/null +++ b/data/output/003102fe2264b62782047b239ac3fac14cd10a94.json @@ -0,0 +1,424 @@ +{ + "file_name": "003102fe2264b62782047b239ac3fac14cd10a94.txt", + "total_words": 1219, + "top_n_words": [ + { + "word": "the", + "count": 91 + }, + { + "word": "of", + "count": 40 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "and", + "count": 31 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "bones", + "count": 20 + }, + { + "word": "sea", + "count": 20 + }, + { + "word": "mammoth", + "count": 19 + }, + { + "word": "that", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "Found!", + "length": 6 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'It took a couple of trips.", + "length": 27 + }, + { + "text": "4 metre) tall woolly mammoth.", + "length": 29 + }, + { + "text": "Its impressive tusks are pictured .", + "length": 35 + }, + { + "text": "Here, an expert cleans mammoth molars .", + "length": 39 + }, + { + "text": "'But it's quite common to find something.", + "length": 41 + }, + { + "text": "'They often dump the bones back in the sea'.", + "length": 44 + }, + { + "text": "An illustration of the Ice Age giant is pictured .", + "length": 50 + }, + { + "text": "' Here, a salvager holds part of a mammoth's lower jaw .", + "length": 56 + }, + { + "text": "'It doesn't look like much, but then we had it carbon dated.", + "length": 60 + }, + { + "text": "The mammoth would have eaten vegetation on the low-lying tundra.", + "length": 64 + }, + { + "text": "'It proves the animal was there much longer than thought,' he said.", + "length": 67 + }, + { + "text": "They shared the land with bison and wolves, as well as other megafauna.", + "length": 71 + }, + { + "text": "The land mass altered according to climate changes from dry and icy to marsh.", + "length": 77 + }, + { + "text": "Roaming the tundra: The mammoth would have eaten vegetation on the low-lying tundra.", + "length": 84 + }, + { + "text": "Mr Broch said it is 'extremely rare' to find mammoth skulls and large bones on the seabed.", + "length": 90 + }, + { + "text": "'It's not as impressive [looking] as the mammoth, but it's more scientifically interesting.", + "length": 91 + }, + { + "text": "‘Sometimes we charter a boat of our own and go for special “fossil hunting” expeditions.", + "length": 94 + }, + { + "text": "A vast tundra stretched between Britain and Europe in an ancient landmass known as Doggerland.", + "length": 94 + }, + { + "text": "To cope with such conditions, mammoths had six inches of fat and fur as long as three feet in places.", + "length": 101 + }, + { + "text": "We started with the skull and then found little ones to replace those that were missing,' he explained.", + "length": 103 + }, + { + "text": "In the spring, the ice melted a little, revealing rich pastures that attracted herds of grazing animals.", + "length": 104 + }, + { + "text": "The place to be: The bones were pulled from the sea near the port of Rotterdam in the Netherlands (mapped).", + "length": 107 + }, + { + "text": "When the creature was alive 40,000 years ago, the now watery expanse was a low-lying stretch of icy tundra.", + "length": 107 + }, + { + "text": "They have also found parts of sabre-toothed tigers, the skull of a woolly rhino and the cranium of a reindeer.", + "length": 110 + }, + { + "text": "It looked a bit like northern Siberia, with green shoots poking though snow in winter, when it was cold and barren.", + "length": 115 + }, + { + "text": "Mr Broch said: ‘Most weeks we go to the fishing ports to meet the fishing vessels and buy the fossils they caught.", + "length": 116 + }, + { + "text": "‘We have assembled a number of complete skeletons of mammoths, something very few companies in the world can do’.", + "length": 117 + }, + { + "text": "Because of the low sea levels, antelope and ancient deer, for example, could migrate from what is now mainland Europe.", + "length": 118 + }, + { + "text": "At the time of the last glacial maximum, 18,000 years ago, sea levels were 340ft (120 metres) lower than they are today.", + "length": 120 + }, + { + "text": "'When they died, their skeletons remained there so it's quite common to find extinct bones in the North Sea,' he explained.", + "length": 123 + }, + { + "text": "Here, amateur palaeontologist Klaas Post and mammoth expert Dick Mol examine bones collected on a boat trip in the North Sea .", + "length": 126 + }, + { + "text": "He explained that his fisherman father-in-law started collecting these bones at a young age and has amassed a large collection.", + "length": 127 + }, + { + "text": "Dutch fossil hunters have pulled 40,000-year-old bones (pictured) from the depths of the North Sea to create a complete skeleton .", + "length": 130 + }, + { + "text": "Most fishermen have found extinct bones [near Rotterdam] by dredging and they're more likely to find something than not,' he said.", + "length": 130 + }, + { + "text": "A rare discovery: Markus Broch, who works at North Sea Fossils, said it is 'extremely rare' to find a complete mammoth skeleton on the seabed.", + "length": 142 + }, + { + "text": "The group of archaeologists, salvagers and palaeontologists trawled the waters off the east coast of Rotterdam at a depth of 100 feet (30 metres).", + "length": 146 + }, + { + "text": "Mr Broch explained that lots are found because large ships leaving and entering the bustling port disturb the sea bed, making bones easier to find .", + "length": 148 + }, + { + "text": "The North Sea may seem a surprising location to discover a woolly mammoth skeleton, but Dutch fossil hunters have hauled ancient bones from its depths.", + "length": 151 + }, + { + "text": "' The bone is 20,000 years old, making it one of the youngest found in the area, causing experts to re-think when the creature became extinct in Europe.", + "length": 152 + }, + { + "text": "They also uncovered bones belonging to woolly rhinos and Irish elks, plus a prehistoric skull of a European bison, also known as a Wisent on the North Sea bed.", + "length": 159 + }, + { + "text": "‘Because we see so many fossils we work very closely with the leading experts in the field, such as Dick Mol, who is the world's leading authority on mammoths.", + "length": 161 + }, + { + "text": "The North Sea didn’t exist at all and the Thames was a tributary of the ancient German Rhine River, which ran down the middle of what is now the English Channel.", + "length": 163 + }, + { + "text": "' Large clean-up operation: The salvagers managed to piece together the entire mammoth skeleton after initially discovering the skull and tusks of the animal in 2010.", + "length": 166 + }, + { + "text": "'In the Ice Age, lots of water from the sea became great ice sheets and the land between the Netherlands and the UK was joined up, so megafauna could just walk around.", + "length": 167 + }, + { + "text": "Employees of the company, based in Urk in the Netherlands, search for remains of extinct animals in the dark depths and in 2010 discovered bones belonging to an 11ft (3.", + "length": 169 + }, + { + "text": "While Mr Broch told MailOnline that the mammoth is perhaps the team's most impressive discovery, they found a 'wonderful fragment of lower jawbone from a sabre-tooth tiger.", + "length": 172 + }, + { + "text": "Another bid discovery: The team also found bones belonging to woolly rhinos and Irish elks, plus a prehistoric skull of a European bison (pictured), also known as a Wisent on the North Sea bed .", + "length": 194 + }, + { + "text": "During the Ice Age, when mammoth roamed the Earth, lots of water that now makes up seas and oceans, was locked up in glaciers and huge sheets of ice, so sea levels were lower than they are today.", + "length": 195 + }, + { + "text": "They collected its skull, tusks and other large bones, and filled in any missing ones with finds from similar beasts discovered nearby of a similar age, to form a complete skeleton after months of work.", + "length": 202 + }, + { + "text": "Carbon dating tests revealed that some of the bones belonged to a mammoth that roamed Doggerland - a former landmass in the southern North Sea that connected Great Britain to mainland Europe - around 40,000 years ago.", + "length": 217 + }, + { + "text": "Markus Broch, who works at North Sea Fossils - the firm that found the mammoth - told MailOnline that the sea there is relatively shallow and that bones are revealed by currents created by large ships entering the busy port.", + "length": 224 + }, + { + "text": "when mammoths roamed the Earth, much like parts of SIberia today (pictured) Catch of the day: The salvagers managed to piece together the entire mammoth skeleton after initially discovering the skull (pictured) and tusks of the animal in 2010 .", + "length": 244 + }, + { + "text": "Something to chew over: Mr Broch told MailOnline: 'Most fishermen have found extinct bones [near Rotterdam] by using dredging and they're more likely to find something than not,' before adding that discovering large mammoth bones are 'extremely rare.", + "length": 250 + }, + { + "text": "Salvagers collected the mammoth's skull (pictured left), tusks and other large bones from the bottom of North Sea, which 40,000 years ago would have been an icy tundra, connecting Britain to Europe (pictured right) Ice Age find: Carbon dating tests revealed that some of the bones belonged to a mammoth that roamed Doggerland - a former landmass in the southern North Sea that connected Great Britain to mainland Europe - around 40,000 years ago.", + "length": 446 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.41865648329257965 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:40.745470348Z", + "first_section_created": "2025-12-23T09:33:40.745860164Z", + "last_section_published": "2025-12-23T09:33:40.746282682Z", + "all_results_received": "2025-12-23T09:33:40.837764207Z", + "output_generated": "2025-12-23T09:33:40.837963115Z", + "total_processing_time_ms": 92, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 91, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:40.745860164Z", + "publish_time": "2025-12-23T09:33:40.746089274Z", + "first_worker_start": "2025-12-23T09:33:40.746679399Z", + "last_worker_end": "2025-12-23T09:33:40.827659Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:40.7467024Z", + "start_time": "2025-12-23T09:33:40.746775103Z", + "end_time": "2025-12-23T09:33:40.746893508Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:40.746997Z", + "start_time": "2025-12-23T09:33:40.747144Z", + "end_time": "2025-12-23T09:33:40.827659Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 80 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:40.746662998Z", + "start_time": "2025-12-23T09:33:40.746728001Z", + "end_time": "2025-12-23T09:33:40.746820805Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:40.746617096Z", + "start_time": "2025-12-23T09:33:40.746679399Z", + "end_time": "2025-12-23T09:33:40.746725101Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:40.746182178Z", + "publish_time": "2025-12-23T09:33:40.746282682Z", + "first_worker_start": "2025-12-23T09:33:40.746828505Z", + "last_worker_end": "2025-12-23T09:33:40.836867Z", + "total_journey_time_ms": 90, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:40.746800504Z", + "start_time": "2025-12-23T09:33:40.746831205Z", + "end_time": "2025-12-23T09:33:40.746865406Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:40.747033Z", + "start_time": "2025-12-23T09:33:40.747446Z", + "end_time": "2025-12-23T09:33:40.836867Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 89 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:40.746862006Z", + "start_time": "2025-12-23T09:33:40.746901008Z", + "end_time": "2025-12-23T09:33:40.74695171Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:40.746762002Z", + "start_time": "2025-12-23T09:33:40.746828505Z", + "end_time": "2025-12-23T09:33:40.746862906Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 169, + "min_processing_ms": 80, + "max_processing_ms": 89, + "avg_processing_ms": 84, + "median_processing_ms": 89, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3491, + "slowest_section_id": 1, + "slowest_section_time_ms": 90 + } +} diff --git a/data/output/0031683fd329729020c9b2200e5110d766eacc3e.json b/data/output/0031683fd329729020c9b2200e5110d766eacc3e.json new file mode 100644 index 0000000..6044388 --- /dev/null +++ b/data/output/0031683fd329729020c9b2200e5110d766eacc3e.json @@ -0,0 +1,266 @@ +{ + "file_name": "0031683fd329729020c9b2200e5110d766eacc3e.txt", + "total_words": 564, + "top_n_words": [ + { + "word": "the", + "count": 44 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "london", + "count": 9 + }, + { + "word": "were", + "count": 9 + }, + { + "word": "buses", + "count": 8 + }, + { + "word": "s", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Mia De Graaf .", + "length": 14 + }, + { + "text": "Dwindling in numbers, the original Routemaster is almost a thing of the past.", + "length": 77 + }, + { + "text": "Crowds lined the paths in the sun this morning watch the old double-deckers parade in convoy.", + "length": 93 + }, + { + "text": "Green Routemasters worked for London Transport's country bus division to destinations including Windsor.", + "length": 104 + }, + { + "text": "Though it was three tonnes lighter than the chunky 56-seat trolleybuses, the final design managed to seat 64.", + "length": 109 + }, + { + "text": "With the Routemaster's longevity, examples were painted to celebrate both the Queen's Silver and Golden Jubilees.", + "length": 113 + }, + { + "text": "But nostalgic Londoners are being treated to a throwback this weekend with a fleet of the iconic buses on display.", + "length": 114 + }, + { + "text": "The first prototype was developed between 1947 and 1956, with a brief to create a lighter, fuel-efficient vehicle.", + "length": 114 + }, + { + "text": "Though some have been repainted in the years since they were used, many retain the adverts and posters of the time.", + "length": 115 + }, + { + "text": "In 1977, 25 Routemasters were painted silver to celebrate the Silver Jubilee and, in 2002, 50 buses were painted gold.", + "length": 118 + }, + { + "text": "Revolutionary: Though it was three tonnes lighter than the chunky 56-seat trolleybuses, the final design managed to seat 64 .", + "length": 125 + }, + { + "text": "'Perfect design': The first prototype was developed between 1947 and 1956, with a brief to create a lighter, fuel-efficient vehicle .", + "length": 133 + }, + { + "text": "Through the ages: With badges from the 1950s up to an Olympic-themed 2012 badge, this conductor was decked out in Routemaster memorabilia .", + "length": 139 + }, + { + "text": "And the nostalgia thoroughfare was completed with old film reels projected on TV screens, memorabilia stalls, and old fashioned food stands.", + "length": 140 + }, + { + "text": "RM1 vehicles from as far afield as Scotland, Germany and France have been called home to join the fleet, which will take another tour tomorrow.", + "length": 143 + }, + { + "text": "Colour code: Green Routemasters worked for London Transport's country bus division, which went from the edges of London out to places including Windsor .", + "length": 153 + }, + { + "text": "Celebrations: Tomorrow the bespoke fleet will parade again in a 'through the decades' special, set to draw in hundreds of fanatics from around the world .", + "length": 154 + }, + { + "text": "Symbol of Britain: Although there are only two heritage routes that still run the original buses in London, they remain a worldwide symbol for the nation .", + "length": 155 + }, + { + "text": "Enthusiasts: Conductors and enthusiasts arrived at the event bearing vintage badges to honour the landmark occasion, which has been years in the planning .", + "length": 155 + }, + { + "text": "Iconic: Buses that have been sent to France, Scotland and Germany for other uses since they stopped transporting Londoners were recalled for the convention .", + "length": 157 + }, + { + "text": "A thing of the past: The buses remained on London's streets until 2005, when bendy buses and new Routemasters were developed - though two routes still use them .", + "length": 161 + }, + { + "text": "Old fashioned: More than 2,000 Routemasters were built over the decades but now only 1,000 still exist, so this scene of one driving through London is a rarity .", + "length": 161 + }, + { + "text": "Nostalgia: Crowds clamoured to photograph a rare gathering of the first 130 Routemaster buses in London's Finsbury Park to mark the 60th anniversary of the fleet .", + "length": 163 + }, + { + "text": "Original: This rickety bus was used in London in the 1920s before trolleybuses were invented - a chunky version of the open-backed Routemasters that came in in 1954 .", + "length": 166 + }, + { + "text": "Crowds: After a parade around Finsbury Park, in north London, the fleet lined up for spectators to get a closer look, while vintage film reels were projected on screens .", + "length": 170 + }, + { + "text": "Celebrating the Routemaster Association's Diamond Jubilee Celebration, around 130 buses drove through the capital's Finsbury Park today - including the first ones ever registered.", + "length": 179 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.44976112246513367 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:41.247508237Z", + "first_section_created": "2025-12-23T09:33:41.249152006Z", + "last_section_published": "2025-12-23T09:33:41.249340914Z", + "all_results_received": "2025-12-23T09:33:41.311546714Z", + "output_generated": "2025-12-23T09:33:41.311749423Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:41.249152006Z", + "publish_time": "2025-12-23T09:33:41.249340914Z", + "first_worker_start": "2025-12-23T09:33:41.24997484Z", + "last_worker_end": "2025-12-23T09:33:41.31062Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:41.249984641Z", + "start_time": "2025-12-23T09:33:41.250057944Z", + "end_time": "2025-12-23T09:33:41.250128947Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:41.2502Z", + "start_time": "2025-12-23T09:33:41.250351Z", + "end_time": "2025-12-23T09:33:41.31062Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:41.249906837Z", + "start_time": "2025-12-23T09:33:41.24997484Z", + "end_time": "2025-12-23T09:33:41.250050643Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:41.24996784Z", + "start_time": "2025-12-23T09:33:41.250031943Z", + "end_time": "2025-12-23T09:33:41.250061744Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3455, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/00317217579a48595004e7ea2f0909033c187f0c.json b/data/output/00317217579a48595004e7ea2f0909033c187f0c.json new file mode 100644 index 0000000..30590e9 --- /dev/null +++ b/data/output/00317217579a48595004e7ea2f0909033c187f0c.json @@ -0,0 +1,218 @@ +{ + "file_name": "00317217579a48595004e7ea2f0909033c187f0c.txt", + "total_words": 279, + "top_n_words": [ + { + "word": "in", + "count": 15 + }, + { + "word": "the", + "count": 15 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "victoria", + "count": 6 + }, + { + "word": "heat", + "count": 5 + }, + { + "word": "on", + "count": 5 + }, + { + "word": "children", + "count": 4 + }, + { + "word": "have", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "No excuses.", + "length": 11 + }, + { + "text": "No exceptions.", + "length": 14 + }, + { + "text": "'It is never safe to leave anyone in cars.", + "length": 42 + }, + { + "text": "All four happened in the 24 hours leading into early Saturday.", + "length": 62 + }, + { + "text": "They're also urged to check on elderly relative, friends and neighbours to ensure they're okay.", + "length": 95 + }, + { + "text": "Many involved people collapsing in public or becoming exhausted after working in the heat of the day.", + "length": 101 + }, + { + "text": "In one, three children all aged under five, were left alone in a Geelong carpark - southwest of Melbourne.", + "length": 106 + }, + { + "text": "There have been four incidents of children left in locked cars in Victoria as the temperatures topped 30C degrees.", + "length": 114 + }, + { + "text": "There have been four incidents of children left in locked cars in Victoria as the temperatures topped 30C degrees .", + "length": 115 + }, + { + "text": "Paramedics have also responded to 18 heat-related incidents - 11 involving people aged over 65 - and 15 people needed hospital treatment.", + "length": 137 + }, + { + "text": "' Raging bushfires have been burning in Victoria since Friday, destroying one home in Moyston at the foothills of the Grampians in western Victoria.", + "length": 148 + }, + { + "text": "Meanwhile, a 21-year-old St Albans man drowned at Jan Juc beach, south of Geelong, on Friday as many tried to escape the heat with a dip in the sea.", + "length": 148 + }, + { + "text": "Temperatures in regional Victoria are predicted to top 40C on Saturday and locals are urged to keep out of the sun and to drink plenty of water through the day.", + "length": 160 + }, + { + "text": "'We know that consecutive days of heat with warm nights like last night has an effect on the way that our bodies can recover, especially the infirm and the elderly,' Mr Holman added.", + "length": 182 + }, + { + "text": "With even hotter conditions expected on Saturday, Ambulance Victoria State Health Commander Paul Holman has urged Victorians to better protect themselves and, particularly, children from the heat.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5417035222053528 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:41.750136351Z", + "first_section_created": "2025-12-23T09:33:41.751947426Z", + "last_section_published": "2025-12-23T09:33:41.752115833Z", + "all_results_received": "2025-12-23T09:33:41.81469505Z", + "output_generated": "2025-12-23T09:33:41.814835055Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:41.751947426Z", + "publish_time": "2025-12-23T09:33:41.752115833Z", + "first_worker_start": "2025-12-23T09:33:41.752592753Z", + "last_worker_end": "2025-12-23T09:33:41.813825Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:41.752628655Z", + "start_time": "2025-12-23T09:33:41.752695458Z", + "end_time": "2025-12-23T09:33:41.752732059Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:41.752888Z", + "start_time": "2025-12-23T09:33:41.753023Z", + "end_time": "2025-12-23T09:33:41.813825Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:41.752590753Z", + "start_time": "2025-12-23T09:33:41.752648256Z", + "end_time": "2025-12-23T09:33:41.752700658Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:41.752541751Z", + "start_time": "2025-12-23T09:33:41.752592753Z", + "end_time": "2025-12-23T09:33:41.752616554Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1644, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0031c3dfc04e4cf8428e1eca7beb2fdb7bf6cd4d.json b/data/output/0031c3dfc04e4cf8428e1eca7beb2fdb7bf6cd4d.json new file mode 100644 index 0000000..0cbb464 --- /dev/null +++ b/data/output/0031c3dfc04e4cf8428e1eca7beb2fdb7bf6cd4d.json @@ -0,0 +1,346 @@ +{ + "file_name": "0031c3dfc04e4cf8428e1eca7beb2fdb7bf6cd4d.txt", + "total_words": 574, + "top_n_words": [ + { + "word": "the", + "count": 37 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "mr", + "count": 12 + }, + { + "word": "school", + "count": 11 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "by", + "count": 9 + }, + { + "word": "education", + "count": 9 + }, + { + "word": "gove", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "A .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Lib .", + "length": 5 + }, + { + "text": "issue.", + "length": 6 + }, + { + "text": "places.", + "length": 7 + }, + { + "text": "places.", + "length": 7 + }, + { + "text": "‘On top of .", + "length": 14 + }, + { + "text": "For his part, .", + "length": 15 + }, + { + "text": "The money from .", + "length": 16 + }, + { + "text": "free schools budget.", + "length": 20 + }, + { + "text": "places in areas of need.", + "length": 24 + }, + { + "text": "The Lib Dems, who backed free .", + "length": 31 + }, + { + "text": "brief their versions to the media.", + "length": 34 + }, + { + "text": "Jason Groves, Deputy Political Editor .", + "length": 39 + }, + { + "text": "Former education aide Dominic Cummings .", + "length": 40 + }, + { + "text": "’ The department split is so severe that .", + "length": 44 + }, + { + "text": "£400million transfer by Mr Gove at the end of last year.", + "length": 57 + }, + { + "text": "rise by more than £200million a year,’ the spokesman said.", + "length": 61 + }, + { + "text": "branded Mr Clegg ‘self-obsessed, dishonest and revolting’.", + "length": 62 + }, + { + "text": "‘From 2015, funding to councils for new school places will .", + "length": 62 + }, + { + "text": "’ A spokesman for Mr Gove dismissed the claims, pointing out .", + "length": 64 + }, + { + "text": "revealed thousands of teachers have deserted the party for Labour.", + "length": 66 + }, + { + "text": "‘Michael Gove is so ideologically obsessed with his free school .", + "length": 67 + }, + { + "text": "that the creation of free schools will create thousands of school .", + "length": 67 + }, + { + "text": "It is understood one was vetoed by Mr Laws, leaving both sides to .", + "length": 67 + }, + { + "text": "Dem sources leaked details to friendly newspapers yesterday of the .", + "length": 68 + }, + { + "text": "the Basic Need fund, which helps councils to increase the number of .", + "length": 69 + }, + { + "text": "source described the move as ‘nothing short of lunacy’, adding: .", + "length": 69 + }, + { + "text": "the Deputy Prime Minister called Mr Cummings a ‘loopy ideologue’.", + "length": 69 + }, + { + "text": "officials were yesterday unable to give an official statement on the .", + "length": 70 + }, + { + "text": "this, investment in free schools will provide tens of thousands of new .", + "length": 72 + }, + { + "text": "school places, was put towards meeting an £800million overspend in the .", + "length": 73 + }, + { + "text": "schools, have stepped up attacks on Mr Gove and the project after polls .", + "length": 73 + }, + { + "text": "experiment, he’s willing to see children struggle to get suitable school .", + "length": 76 + }, + { + "text": "A major coalition row has erupted over the fate of £400million for extra school places.", + "length": 88 + }, + { + "text": "Lib Dem sources claim the move was opposed by Mr Laws and could jeopardise up to 30,000 new places.", + "length": 99 + }, + { + "text": "Education Secretary Michael Gove has come under fire from the Lib Dems over funding for new school places .", + "length": 107 + }, + { + "text": "A Government source said: ‘The sad thing is we have got a good story to tell on schools and that message is being lost.", + "length": 121 + }, + { + "text": "Labour yesterday said parents would be appalled by ministers fighting each other rather than tackling the challenges facing the education system.", + "length": 145 + }, + { + "text": "’ Other insults in the ongoing clashes included a baseless story that David Cameron’s 17-year marriage was on the rocks, allegedly spread by Lib Dems.", + "length": 154 + }, + { + "text": "Deputy Prime Minister Nick Clegg, pictured with wife Miriam, has been accused of 'lying' about his free school lunches policy by a former aide to Mr Gove .", + "length": 155 + }, + { + "text": "’ David Cameron yesterday dismissed the row, but Downing Street is understood to be growing frustrated by the damaging headlines emerging from the department.", + "length": 160 + }, + { + "text": "A Whitehall source said the Department for Education had become ‘completely dysfunctional’ following clashes between Mr Gove and his Lib Dem deputy David Laws.", + "length": 163 + }, + { + "text": "It follows a clash last week when allies of Mr Gove accused Mr Clegg and Mr Laws of ‘lying’ over funding for their own pet project – free school meals for infant school pupils.", + "length": 182 + }, + { + "text": "David Cameron was under pressure to intervene in a bitter row between Michael Gove and Nick Clegg last night, amid warnings it is overshadowing the Government’s record on education.", + "length": 183 + }, + { + "text": "Shadow education secretary Tristram Hunt said: ‘The Education Department is playing party politics between the Conservatives and the Liberal Democrats when they should be focusing on the education of our young people.", + "length": 219 + }, + { + "text": "The latest row saw the Lib Dems brand the Education Secretary an ‘ideologically obsessed zealot’ yesterday after it emerged he had transferred £400million from a fund for providing school places into his free schools project.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8759641051292419 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:42.252877369Z", + "first_section_created": "2025-12-23T09:33:42.253171081Z", + "last_section_published": "2025-12-23T09:33:42.253366089Z", + "all_results_received": "2025-12-23T09:33:42.316508629Z", + "output_generated": "2025-12-23T09:33:42.316660236Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:42.253171081Z", + "publish_time": "2025-12-23T09:33:42.253366089Z", + "first_worker_start": "2025-12-23T09:33:42.253955814Z", + "last_worker_end": "2025-12-23T09:33:42.314352Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:42.253997816Z", + "start_time": "2025-12-23T09:33:42.254065819Z", + "end_time": "2025-12-23T09:33:42.254137422Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:42.254201Z", + "start_time": "2025-12-23T09:33:42.254358Z", + "end_time": "2025-12-23T09:33:42.314352Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:42.253978415Z", + "start_time": "2025-12-23T09:33:42.254054918Z", + "end_time": "2025-12-23T09:33:42.254144622Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:42.253889111Z", + "start_time": "2025-12-23T09:33:42.253955814Z", + "end_time": "2025-12-23T09:33:42.253996016Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3569, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0031f262451b4b335dfda3aca9b533d244244e1e.json b/data/output/0031f262451b4b335dfda3aca9b533d244244e1e.json new file mode 100644 index 0000000..8b5c477 --- /dev/null +++ b/data/output/0031f262451b4b335dfda3aca9b533d244244e1e.json @@ -0,0 +1,270 @@ +{ + "file_name": "0031f262451b4b335dfda3aca9b533d244244e1e.txt", + "total_words": 610, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "he", + "count": 16 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "was", + "count": 14 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "that", + "count": 10 + }, + { + "word": "his", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "His first stop: Iraq and Jordan.", + "length": 32 + }, + { + "text": "\" For Stanton, that place was Jordan.", + "length": 37 + }, + { + "text": "The stories he heard were different as well.", + "length": 44 + }, + { + "text": "His time there, he admits, was often terrifying.", + "length": 48 + }, + { + "text": "In the absence of immediate danger, there was hope.", + "length": 51 + }, + { + "text": "The day he left, the United States started air strikes.", + "length": 55 + }, + { + "text": "\"It was an abundance of caution it turned out, but it was a sleepless night.", + "length": 76 + }, + { + "text": "Last month, he turned his lens, and poignant interview style, to the world at large.", + "length": 84 + }, + { + "text": "\" It was one of many moments, he admits, when he appreciated what it means to feel secure.", + "length": 90 + }, + { + "text": "(CNN) -- Four years ago, Brandon Stanton became New York City's unofficial photo-chronicler.", + "length": 92 + }, + { + "text": "\" 'Pack a bag and get ready to run' The day Stanton landed in Iraq, ISIS captured the Mosul Dam.", + "length": 96 + }, + { + "text": "\"Everybody I talked to had such big dreams, and often such limited opportunities with which to achieve those dreams.", + "length": 116 + }, + { + "text": "And I realized that in the absence of physical security, no other layers of life can really be experienced,\" he says.", + "length": 117 + }, + { + "text": "\"The other thing this trip has made me realize is the depth of ambition, particularly in underdeveloped countries,\" he says.", + "length": 124 + }, + { + "text": "The first people he questioned were Yazidi refugees who had only freshly fled their homes, their families, their lives as a whole.", + "length": 130 + }, + { + "text": "\"I felt I couldn't ask them beyond their present circumstances, because their lives were absolutely consumed by those circumstances.", + "length": 132 + }, + { + "text": "\"That constant uncertainty seeps into your psyche in a way that you can't really pinpoint until you go back to a place that is secure.", + "length": 134 + }, + { + "text": "\"My idea of what constitutes personal tragedy has been expanded a lot just listening to what these people are going through,\" admits Stanton.", + "length": 141 + }, + { + "text": "His trademark -- the micro narratives that accompany each image -- has been imitated in the far reaches of the globe, from Sydney to Khartoum.", + "length": 142 + }, + { + "text": "'My eyes were opened' Though his trip was \"months in the works,\" Stanton didn't anticipate that his arrival in Erbil, Iraq would coincide with that of ISIS.", + "length": 156 + }, + { + "text": "\"I went to this place where all people wanted was a bit of security: to send their kids to school, to start a business, to get married and live a normal life.", + "length": 158 + }, + { + "text": "The tenor, though often still heart-breaking (he visited Zaatari Refugee Camp that houses around 80,000 Syrians), demonstrated a lighter side of the human character.", + "length": 165 + }, + { + "text": "\"I really noticed when I landed in Jordan, where the infrastructures was in place and there was no imminent threat, that there was a load lifted from my psyche,\" he says.", + "length": 170 + }, + { + "text": "With his blog, Humans of New York -- which has over eight million followers on social media -- he has captured the heart and soul of the city's multi-national inhabitants.", + "length": 171 + }, + { + "text": "In partnership with the United Nations, he is touring roughly a dozen countries in a bid to raise awareness for the peacekeeping organization's Millennium Development Goals.", + "length": 173 + }, + { + "text": "\"When I was in Dohuk, there was a moment I got a call in the middle of the night from UNICEF telling to pack a bag and get ready to run, because ISIS was shelling the town and had broken through the lines,\" he recalls.", + "length": 218 + }, + { + "text": "\"When you've just abandoned your house, and your family is surrounded by a hostile army, and you don't know if they're going to survive, it's just inappropriate to ask what your happiest memory with your mother is,\" he says.", + "length": 224 + }, + { + "text": "For the Yazidis he approached -- from the student who had to abandon his long sought after Master's degree to flee bombs to the mother whose children can't stop crying for home -- Stanton found himself falter at the prospect of pursuing his traditional line of questioning.", + "length": 273 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4194985628128052 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:42.754127225Z", + "first_section_created": "2025-12-23T09:33:42.755630188Z", + "last_section_published": "2025-12-23T09:33:42.755826696Z", + "all_results_received": "2025-12-23T09:33:42.818097199Z", + "output_generated": "2025-12-23T09:33:42.818256706Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:42.755630188Z", + "publish_time": "2025-12-23T09:33:42.755826696Z", + "first_worker_start": "2025-12-23T09:33:42.756504424Z", + "last_worker_end": "2025-12-23T09:33:42.817238Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:42.756458222Z", + "start_time": "2025-12-23T09:33:42.756523425Z", + "end_time": "2025-12-23T09:33:42.756587328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:42.756649Z", + "start_time": "2025-12-23T09:33:42.756781Z", + "end_time": "2025-12-23T09:33:42.817238Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:42.756511425Z", + "start_time": "2025-12-23T09:33:42.756582328Z", + "end_time": "2025-12-23T09:33:42.756665931Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:42.756437721Z", + "start_time": "2025-12-23T09:33:42.756504424Z", + "end_time": "2025-12-23T09:33:42.756530625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3474, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0032962a65bf404388581ef85d501c048eb67987.json b/data/output/0032962a65bf404388581ef85d501c048eb67987.json new file mode 100644 index 0000000..0055a50 --- /dev/null +++ b/data/output/0032962a65bf404388581ef85d501c048eb67987.json @@ -0,0 +1,342 @@ +{ + "file_name": "0032962a65bf404388581ef85d501c048eb67987.txt", + "total_words": 834, + "top_n_words": [ + { + "word": "the", + "count": 59 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "a", + "count": 18 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "be", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "at", + "count": 10 + }, + { + "word": "is", + "count": 10 + }, + { + "word": "on", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "Strange days.", + "length": 13 + }, + { + "text": "Rick Santorum.", + "length": 14 + }, + { + "text": "Andrew Cuomo at 6% and Maryland Gov.", + "length": 36 + }, + { + "text": "Elizabeth Warren at 7%, New York Gov.", + "length": 37 + }, + { + "text": "Rick Perry and former Pennsylvania Sen.", + "length": 39 + }, + { + "text": "The real news is on the Democratic side.", + "length": 40 + }, + { + "text": "Below Biden are first-term Massachusetts Sen.", + "length": 45 + }, + { + "text": "On the GOP side of the aisle, New Jersey Gov.", + "length": 45 + }, + { + "text": "Opinion: GOP strategy on shutdown courts doom .", + "length": 47 + }, + { + "text": "Clinton's dominance illustrates an interesting dynamic.", + "length": 55 + }, + { + "text": "Conservatives respect structure, order and party brand names.", + "length": 61 + }, + { + "text": "Perhaps most interesting is the second tier of GOP candidates.", + "length": 62 + }, + { + "text": "Obama pressures conservative Republicans over possible shutdown .", + "length": 65 + }, + { + "text": "Paul Ryan, best known as Mitt Romney's vice presidential running mate.", + "length": 70 + }, + { + "text": "The opinions expressed in this commentary are solely those of John Avlon.", + "length": 73 + }, + { + "text": "And then, at the bottom of the barrel, come two 2012 aspirants: Texas Gov.", + "length": 74 + }, + { + "text": "Martin O'Malley -- perhaps the most openly ambitious of the bunch -- at 2%.", + "length": 75 + }, + { + "text": "Chris Christie narrowly leads the fractured field at 17%, one point above Rep.", + "length": 78 + }, + { + "text": "Jeb Bush seems settled in at 10%, despite brand name and legendary brand loyalty.", + "length": 81 + }, + { + "text": "Yes, it is pathetically early to be projecting on the 2016 presidential campaign.", + "length": 81 + }, + { + "text": "In the old days, the previous vice presidential nominee would be the future favorite.", + "length": 85 + }, + { + "text": "Six years ago, she was a far more polarizing figure among Democrats (and independents).", + "length": 87 + }, + { + "text": "Tough and experienced, Clinton is now positioned as a candidate who rivals Obama's 2007 surge.", + "length": 94 + }, + { + "text": "Two Hispanic senate Republicans, Marco Rubio and Ted Cruz, come in next at 9% and 7% respectively.", + "length": 98 + }, + { + "text": "Not for nothing was the name Nixon, Bush or Dole on the GOP presidential ticket from 1952 to 2004.", + "length": 98 + }, + { + "text": "But as a snapshot of the underlying dynamics driving the two parties, this new poll is worth a look.", + "length": 100 + }, + { + "text": "Far from being strengthened by their 2012 campaigns, these two candidates seem weakened by the experience.", + "length": 106 + }, + { + "text": "Traditionally, Republicans have always coalesced around the conventional wisdom front-runner for president.", + "length": 107 + }, + { + "text": "(CNN) -- A new CNN poll confirms that we're witnessing a quiet reversal in the character of our two major parties.", + "length": 114 + }, + { + "text": "Warren's strength comes from fascination with the new and represents the growing strength of the liberal base in the party.", + "length": 123 + }, + { + "text": "Next on the list is Rand Paul, the scion of an outsider libertarian movement sparked by his dad's multiple runs for president.", + "length": 126 + }, + { + "text": "Uncle Joe Biden is well liked by the rank and file, but there doesn't seem to be much of a stampede to put him on the top of the ticket.", + "length": 136 + }, + { + "text": "But the compelling and controversial one-time eye doctor is a first-term senator from Kentucky, far from your typical presidential timber.", + "length": 138 + }, + { + "text": "But that doesn't seem to be the case for Ryan, who emerged from the 2012 presidential race arguably damaged by his association with the Romney campaign.", + "length": 152 + }, + { + "text": "So there you have it: Democrats are behaving like Republicans, falling in line behind the big brand name dominating a race that is still three years away.", + "length": 154 + }, + { + "text": "If Clinton does not run for some reason, Democrats will quickly wake up to the awkward fact that they have almost no depth of the bench after two Obama terms.", + "length": 158 + }, + { + "text": "She will also be positioned as the candidate of the 51%, compelling to women of all ages and even possibly competitive among Republican women in this incarnation.", + "length": 162 + }, + { + "text": "Rick Perry's \"oops\" heard round the world still resonates while Santorum's strident social conservatism doesn't seem to be taken seriously by 95% of the party faithful.", + "length": 168 + }, + { + "text": "In contrast, Democrats have favored the presidential candidate with the hot hand, rising from obscurity to the White House -- think Jimmy Carter, Bill Clinton and Barack Obama.", + "length": 176 + }, + { + "text": "Hillary Clinton has accumulated a towering 55 percentage point lead over her next closest competitor, Vice President Joe Biden, who is at 10% and doesn't exactly lack name recognition.", + "length": 184 + }, + { + "text": "Today, after her service as secretary of state, she seems more qualified and less polarizing, transcending her association with the culture of wars concurrent with Bubba's two terms in office.", + "length": 192 + }, + { + "text": "Traditionally, the governor of blue state New Jersey wouldn't be on the GOP radar at all, but Christie -- cruising to a landslide re-election -- seems to be the exception to this and other rules.", + "length": 195 + }, + { + "text": "And Republicans are behaving like Democrats, putting forward a fractured field with no clear front-runners but elevating a New Jersey governor, a Wisconsin congressman and a Kentucky senator to the front of the pack.", + "length": 216 + }, + { + "text": "But a fresh-out-of-the-oven CNN presidential poll shows a fractured GOP field of newcomers with no clear front-runner while the Democrats have given an unprecedented lead to a brand name of their own: Hillary Clinton.", + "length": 217 + }, + { + "text": "Predictive capacity hovers somewhere near zero, and time fixated on polls would be productively used thinking about the 2014 midterms or the fights over the debt ceiling looming over our divided, dysfunctional Congress.", + "length": 219 + }, + { + "text": "And while successful governors like Cuomo and O'Malley have earned the right to be taken seriously as presidential candidates, the party faithful don't seem to be much interested in buying what they are selling at the moment.", + "length": 225 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.44684210419654846 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:43.256585031Z", + "first_section_created": "2025-12-23T09:33:43.258163297Z", + "last_section_published": "2025-12-23T09:33:43.25845381Z", + "all_results_received": "2025-12-23T09:33:43.318669027Z", + "output_generated": "2025-12-23T09:33:43.318943538Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:43.258163297Z", + "publish_time": "2025-12-23T09:33:43.25845381Z", + "first_worker_start": "2025-12-23T09:33:43.258971931Z", + "last_worker_end": "2025-12-23T09:33:43.317761Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:43.258998332Z", + "start_time": "2025-12-23T09:33:43.259070635Z", + "end_time": "2025-12-23T09:33:43.25918884Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:43.259314Z", + "start_time": "2025-12-23T09:33:43.259474Z", + "end_time": "2025-12-23T09:33:43.317761Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:43.258898328Z", + "start_time": "2025-12-23T09:33:43.258971931Z", + "end_time": "2025-12-23T09:33:43.259076736Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:43.258969231Z", + "start_time": "2025-12-23T09:33:43.259030634Z", + "end_time": "2025-12-23T09:33:43.259070435Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4966, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/00329fc58d09fa32ed332bb2a0adcd6398bcaec4.json b/data/output/00329fc58d09fa32ed332bb2a0adcd6398bcaec4.json new file mode 100644 index 0000000..8b9045c --- /dev/null +++ b/data/output/00329fc58d09fa32ed332bb2a0adcd6398bcaec4.json @@ -0,0 +1,298 @@ +{ + "file_name": "00329fc58d09fa32ed332bb2a0adcd6398bcaec4.txt", + "total_words": 585, + "top_n_words": [ + { + "word": "the", + "count": 31 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "zuckerberg", + "count": 12 + }, + { + "word": "s", + "count": 9 + }, + { + "word": "facebook", + "count": 8 + }, + { + "word": "is", + "count": 8 + }, + { + "word": "as", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Beth Stebner .", + "length": 14 + }, + { + "text": "23:20 EST, 24 March 2013 .", + "length": 26 + }, + { + "text": "03:02 EST, 25 March 2013 .", + "length": 26 + }, + { + "text": "In the past, Zuckerberg, who is worth $13.", + "length": 42 + }, + { + "text": "Christie at their home for the fundraiser .", + "length": 43 + }, + { + "text": "Green declined comment because the group is in early stages.", + "length": 60 + }, + { + "text": "‘Leaders are coming together on a broader agenda,’ they said.", + "length": 65 + }, + { + "text": "Host and hostess: Zuckerberg's wife, Priscilla Chan, helped entertain Gov.", + "length": 74 + }, + { + "text": "3billion as of this month, has not been vocal about his political believes.", + "length": 75 + }, + { + "text": "Facebook did not immediately respond to MailOnline’s request for comment.", + "length": 75 + }, + { + "text": "A source told Politico that the move isn’t necessarily driven by Facebook.", + "length": 76 + }, + { + "text": "Facebook billionaire Mark Zuckerberg is planning his next start-up – but this time, in politics.", + "length": 98 + }, + { + "text": "The Dobbs Ferry, New York native attended Harvard University, only to drop out when Facebook began taking off.", + "length": 110 + }, + { + "text": "According to the Chronicle, industry heavyweights included Google CEO Eric Schmidt as well as Yahoo CEO Marissa Mayer.", + "length": 118 + }, + { + "text": "The high numbers, as well as the high-profile fundraiser, have ignited rumors that Christie has his eye on the White House.", + "length": 123 + }, + { + "text": "In the wake of Hurricane Sandy, Christie’s popularity skyrocketed, with an all-time high approval rating of 74 percent among voters.", + "length": 134 + }, + { + "text": "Sources told the Chronicle that the independent expenditure group was initially formed to Zuckerberg’s former Harvard roommate, Joe Green.", + "length": 140 + }, + { + "text": "Raising funds: Zuckerberg and his wife, Priscilla Chan, held a fundraiser for Gov Christie at their $7million home in Palo Alto last month .", + "length": 140 + }, + { + "text": "The independent expenditure group is expected to focus mainly on immigration reform, and making it easier for international talent to work in the U.", + "length": 148 + }, + { + "text": "The move is unusual in that the CEO of Facebook has traditionally avoided large partisan contributions, instead donating to community-based charities.", + "length": 150 + }, + { + "text": "Radio silence: Until now, Zuckerberg has been fairly reserved in his political spending, only donating $5,000 twice to Facebook's political action committee .", + "length": 158 + }, + { + "text": "The political network: Mark Zuckerberg, pictured at Facebook headquarters earlier this month, is forming a SuperPAC focusing on immigration reform, sources said .", + "length": 162 + }, + { + "text": "The tech guru is exploring a SuperPAC to push the issues and has already poured in millions of his personal fortune, the San Francisco Chronicle originally reported.", + "length": 165 + }, + { + "text": "In 2011, Zuckerberg donated $100million to help improve public schools in Newark, New Jersey, sparking a close-knit relationship with the state’s governor, Chris Christie.", + "length": 173 + }, + { + "text": "Fast friends: New Jersey Governor Chris Christie, left, Facebook CEO Mark Zuckerberg, center, and Newark Mayor Cory Booker, right, spoke at an educational summit earlier this year .", + "length": 181 + }, + { + "text": "The 28-year-old wunderkind is entering the partisan arena with wide-reaching plans for education and immigration reform, among other things in a Silicon Valley SuperPAC, sources said.", + "length": 183 + }, + { + "text": "According to the Chronicle, Zuckerberg will have Republican consulants Jon Lerner and Rob Jesmer on board, as well as former press secretary Joe Lockhart, who worked under former president Bill Clinton.", + "length": 202 + }, + { + "text": "According to Politico, the group also seeks to support reform that would allow undocumented immigrants achieve a path to American citizenship, a move that they believe supports a robust economy and strengthens the nation.", + "length": 221 + }, + { + "text": "And last month, Zuckerberg and his wife, Priscilla Chan, hosted the Republican governor at their Palo Alto home for a fundraiser with many of Silicon Valley’s tech darlings, a move that stirred controversy among Democrats.", + "length": 224 + }, + { + "text": "Earlier this month, Zuckerberg and other notable Silicon Valley players met with members of Congress and the president to discuss a ‘more open and flexible’ immigration system to allow the best and the brightest in the tech industry to work on America’s shores.", + "length": 267 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4995706081390381 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:43.759254947Z", + "first_section_created": "2025-12-23T09:33:43.759596761Z", + "last_section_published": "2025-12-23T09:33:43.759912774Z", + "all_results_received": "2025-12-23T09:33:43.825270007Z", + "output_generated": "2025-12-23T09:33:43.825431513Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:43.759596761Z", + "publish_time": "2025-12-23T09:33:43.759912774Z", + "first_worker_start": "2025-12-23T09:33:43.760326292Z", + "last_worker_end": "2025-12-23T09:33:43.824383Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:43.760381894Z", + "start_time": "2025-12-23T09:33:43.760451697Z", + "end_time": "2025-12-23T09:33:43.7605324Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:43.760638Z", + "start_time": "2025-12-23T09:33:43.760777Z", + "end_time": "2025-12-23T09:33:43.824383Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:43.760308191Z", + "start_time": "2025-12-23T09:33:43.760369393Z", + "end_time": "2025-12-23T09:33:43.760445697Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:43.760245588Z", + "start_time": "2025-12-23T09:33:43.760326292Z", + "end_time": "2025-12-23T09:33:43.760376094Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3736, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0032d07e91b8ce3d5f8ac313d799341df7556234.json b/data/output/0032d07e91b8ce3d5f8ac313d799341df7556234.json new file mode 100644 index 0000000..8ce9af2 --- /dev/null +++ b/data/output/0032d07e91b8ce3d5f8ac313d799341df7556234.json @@ -0,0 +1,326 @@ +{ + "file_name": "0032d07e91b8ce3d5f8ac313d799341df7556234.txt", + "total_words": 604, + "top_n_words": [ + { + "word": "a", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "you", + "count": 17 + }, + { + "word": "the", + "count": 15 + }, + { + "word": "cheapoair", + "count": 12 + }, + { + "word": "for", + "count": 12 + }, + { + "word": "i", + "count": 12 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "t", + "count": 10 + }, + { + "word": "your", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "org.", + "length": 4 + }, + { + "text": "That was six months ago.", + "length": 24 + }, + { + "text": "Is she also out of options?", + "length": 27 + }, + { + "text": "E-mail him at celliott@ngs.", + "length": 27 + }, + { + "text": "So I had to buy new tickets.", + "length": 28 + }, + { + "text": "Is there anything you can do?", + "length": 29 + }, + { + "text": "I contacted Cheapoair on your behalf.", + "length": 37 + }, + { + "text": "A phone call doesn't work the same way.", + "length": 39 + }, + { + "text": "Her airline forces her to buy a new one.", + "length": 40 + }, + { + "text": "Mather asks her online agency, Cheapoair.", + "length": 41 + }, + { + "text": "-- Eileen Mather, Glenside, Pennsylvania .", + "length": 42 + }, + { + "text": "com from Philadelphia to Tapachula, Mexico.", + "length": 43 + }, + { + "text": "And, of course, there's the language issue.", + "length": 43 + }, + { + "text": "Two members of our group also had to pay again.", + "length": 47 + }, + { + "text": "After you hang up, your case is basically closed.", + "length": 49 + }, + { + "text": "Here's my story: I bought tickets online through Cheapoair.", + "length": 59 + }, + { + "text": "A: You shouldn't have to pay twice for your airline tickets.", + "length": 60 + }, + { + "text": "Cheapoair refunded the $879 you spent on your second ticket.", + "length": 60 + }, + { + "text": "But you also shouldn't be too quick to blame Cheapoair for the mix-up.", + "length": 70 + }, + { + "text": "com, for a refund, but more than six months later, she's still out $879.", + "length": 72 + }, + { + "text": "Reservations can be lost, paper tickets and boarding passes can be misread.", + "length": 75 + }, + { + "text": "But as your online travel agent, it was responsible for helping you fix it.", + "length": 75 + }, + { + "text": "Copyright 2009 CHRISTOPHER ELLIOTT, DISTRIBUTED BY TRIBUNE MEDIA SERVICES, INC.", + "length": 79 + }, + { + "text": "Christopher Elliott is the ombudsman for National Geographic Traveler magazine.", + "length": 79 + }, + { + "text": "After I returned home, I faxed all of my documents to a supervisor at Cheapoair.", + "length": 80 + }, + { + "text": "Cheapoair may -- or may not -- have been responsible for your non-working tickets.", + "length": 82 + }, + { + "text": "Q: I need your help getting my money back for a plane ticket I had to pay for twice.", + "length": 84 + }, + { + "text": "The online agency shouldn't have kept you in a holding pattern for more than six months.", + "length": 88 + }, + { + "text": "When you're traveling internationally, something can easily get lost in the translation.", + "length": 88 + }, + { + "text": "I've called her repeatedly and left messages, but no one has contacted me, and I'm out $879.", + "length": 92 + }, + { + "text": "A lot can go wrong when you're dealing with a flight schedule that involves multiple carriers.", + "length": 94 + }, + { + "text": "It apologized for the delay and said it contacted Aviasca, but couldn't determine why your ticket wasn't accepted.", + "length": 114 + }, + { + "text": "That's why you buy from an intermediary and pay a booking fee: so there's someone to turn to when something goes wrong.", + "length": 119 + }, + { + "text": "When we arrived in Mexico City, Aviacsa Airlines representatives told us that Cheapoair hadn't paid for the last leg of our trip.", + "length": 129 + }, + { + "text": "If you had arrived at the airport a half-hour earlier, you might have been able to speak with a supervisor and straightened this out.", + "length": 133 + }, + { + "text": "Once you were home, and were running into a brick wall with Cheapoair's supervisor, I would have tried knocking on the front door again.", + "length": 136 + }, + { + "text": "Normally, starting a new query through an online form means your complaint will get reviewed again and may be assigned a new case number.", + "length": 137 + }, + { + "text": "(Tribune Media Services) -- When Eileen Mather lands in Mexico City on her way to Tapachula, Mexico, she learns her airline ticket isn't valid.", + "length": 143 + }, + { + "text": "You could have avoided a lengthy dance with Cheapoair by taking this up with Aviacsa either when you were flying to Tapachula or returning home.", + "length": 144 + }, + { + "text": "An airline representative told Cheapoair it would have to speak to the agent who was working at the ticket counter when you checked in, which was impossible.", + "length": 157 + }, + { + "text": "\" I guess offering a toll-free number around the clock doesn't necessarily mean your questions will be answered quickly, but you can't blame me if I'm left with that impression.", + "length": 177 + }, + { + "text": "Cheapoair's \"Golden Guarantee\" promises \"to provide all our customers with 24/7 toll-free number support because we understand the importance of critical last-minute client/traveler needs and requirements for changes to trips.", + "length": 226 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.49754613637924194 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:44.260999223Z", + "first_section_created": "2025-12-23T09:33:44.261439042Z", + "last_section_published": "2025-12-23T09:33:44.261648651Z", + "all_results_received": "2025-12-23T09:33:44.332548215Z", + "output_generated": "2025-12-23T09:33:44.332705921Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:44.261439042Z", + "publish_time": "2025-12-23T09:33:44.261648651Z", + "first_worker_start": "2025-12-23T09:33:44.262271577Z", + "last_worker_end": "2025-12-23T09:33:44.331156Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:44.262286477Z", + "start_time": "2025-12-23T09:33:44.262394782Z", + "end_time": "2025-12-23T09:33:44.262525787Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:44.262463Z", + "start_time": "2025-12-23T09:33:44.262627Z", + "end_time": "2025-12-23T09:33:44.331156Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:44.262181573Z", + "start_time": "2025-12-23T09:33:44.262271577Z", + "end_time": "2025-12-23T09:33:44.26235138Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:44.262192173Z", + "start_time": "2025-12-23T09:33:44.262294878Z", + "end_time": "2025-12-23T09:33:44.262325179Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3409, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/0032ff1c9c5651cf54cfa041a80c2ff4875fd68f.json b/data/output/0032ff1c9c5651cf54cfa041a80c2ff4875fd68f.json new file mode 100644 index 0000000..eb2d326 --- /dev/null +++ b/data/output/0032ff1c9c5651cf54cfa041a80c2ff4875fd68f.json @@ -0,0 +1,234 @@ +{ + "file_name": "0032ff1c9c5651cf54cfa041a80c2ff4875fd68f.txt", + "total_words": 287, + "top_n_words": [ + { + "word": "the", + "count": 19 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "everton", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "besic", + "count": 6 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "for", + "count": 5 + }, + { + "word": "on", + "count": 5 + }, + { + "word": "s", + "count": 5 + }, + { + "word": "at", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "'I .", + "length": 4 + }, + { + "text": "' Once .", + "length": 8 + }, + { + "text": "permanent at the start of the month.", + "length": 36 + }, + { + "text": "they have approached the negotiations.", + "length": 38 + }, + { + "text": "the deal has been confirmed, Besic will become the second summer .", + "length": 66 + }, + { + "text": "acquisition for Martinez, joining Gareth Barry at Goodison Park after .", + "length": 71 + }, + { + "text": "back but first I would like to thank Ferencvaros for the way in which .", + "length": 71 + }, + { + "text": "am looking forward to introducing him to the Everton fans when we come .", + "length": 72 + }, + { + "text": "the England international made his successful loan move to the Toffees .", + "length": 72 + }, + { + "text": "'They have been very understanding at this point and we have enjoyed a good working relationship.", + "length": 97 + }, + { + "text": "'The next few days will allow Mo to meet the players,' the Spaniard told Everton's official website.", + "length": 100 + }, + { + "text": "On the move: Everton are closing in on the £4million signing of Bosnia international Muhamed Besic .", + "length": 101 + }, + { + "text": "International: Besic, who signs from Hungarian side Ferencvaros, appeared at this summer's World Cup .", + "length": 102 + }, + { + "text": "'But more importantly it is an opportunity for him to get into optimum condition ahead of the new season.", + "length": 105 + }, + { + "text": "Mastermind: Roberto Martinez is looking for ways to improve on Everton's fifth place finish last season .", + "length": 105 + }, + { + "text": "Arrival: Gareth Barry became Everton's first summer signing when he moved from Manchester City on a free transfer .", + "length": 115 + }, + { + "text": "Although Besic will not feature in a friendly against Leicester in Bangkok, Martinez is pleased to have the former Hamburg man in amongst his current squad.", + "length": 156 + }, + { + "text": "Everton appear to be on the brink of announcing their second summer signing as Ferencvaros midfielder Muhamed Besic has joined the club's pre-season trip to Thailand.", + "length": 166 + }, + { + "text": "Besic, 21, represented Bosnia-Herzegovina in the World Cup and has been a target for Everton boss Roberto Martinez as he looks to add to a squad which he guided to a fifth place Barclays Premier League finish last season.", + "length": 221 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.40891286730766296 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:44.762441687Z", + "first_section_created": "2025-12-23T09:33:44.762804103Z", + "last_section_published": "2025-12-23T09:33:44.76299101Z", + "all_results_received": "2025-12-23T09:33:44.826915883Z", + "output_generated": "2025-12-23T09:33:44.827038488Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:44.762804103Z", + "publish_time": "2025-12-23T09:33:44.76299101Z", + "first_worker_start": "2025-12-23T09:33:44.763526533Z", + "last_worker_end": "2025-12-23T09:33:44.824559Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:44.763498332Z", + "start_time": "2025-12-23T09:33:44.763558734Z", + "end_time": "2025-12-23T09:33:44.763595036Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:44.763728Z", + "start_time": "2025-12-23T09:33:44.763897Z", + "end_time": "2025-12-23T09:33:44.824559Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:44.763508832Z", + "start_time": "2025-12-23T09:33:44.763576335Z", + "end_time": "2025-12-23T09:33:44.763629837Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:44.76346023Z", + "start_time": "2025-12-23T09:33:44.763526533Z", + "end_time": "2025-12-23T09:33:44.763560634Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1723, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/00330ad079a460a329fa0977f2db3810044f23af.json b/data/output/00330ad079a460a329fa0977f2db3810044f23af.json new file mode 100644 index 0000000..227ce12 --- /dev/null +++ b/data/output/00330ad079a460a329fa0977f2db3810044f23af.json @@ -0,0 +1,480 @@ +{ + "file_name": "00330ad079a460a329fa0977f2db3810044f23af.txt", + "total_words": 1166, + "top_n_words": [ + { + "word": "the", + "count": 83 + }, + { + "word": "in", + "count": 43 + }, + { + "word": "shanghai", + "count": 28 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "is", + "count": 23 + }, + { + "word": "s", + "count": 21 + }, + { + "word": "as", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "The $5.", + "length": 7 + }, + { + "text": "The Bund .", + "length": 10 + }, + { + "text": "Shikumen .", + "length": 10 + }, + { + "text": "Skyscrapers .", + "length": 13 + }, + { + "text": "Pidgin English .", + "length": 16 + }, + { + "text": "International food .", + "length": 20 + }, + { + "text": "\" Cement is \"si men ting.", + "length": 25 + }, + { + "text": "Shanghai is an unusual place.", + "length": 29 + }, + { + "text": "accompany deep-fried pork chops.", + "length": 32 + }, + { + "text": "A spring lock is called \"si ba lin.", + "length": 35 + }, + { + "text": "Tianzifang is a more Bohemian area.", + "length": 35 + }, + { + "text": "Shanghai is also building a Disneyland.", + "length": 39 + }, + { + "text": "International events and entertainment .", + "length": 40 + }, + { + "text": "Next in line is the 632-meter Shanghai Tower.", + "length": 45 + }, + { + "text": "But you can still find them in a few corners.", + "length": 45 + }, + { + "text": "Its personality remains just as strong today.", + "length": 45 + }, + { + "text": "It's the only Formula One stop in mainland China.", + "length": 49 + }, + { + "text": "Shikumen is Shanghai's indigenous alleyway housing.", + "length": 51 + }, + { + "text": "\" In Shanghainese, \"on sale\" can refer to a \"cheap\" person.", + "length": 59 + }, + { + "text": "Local mothers usually cook it to treat friends and families.", + "length": 60 + }, + { + "text": "Colonial history has trickled down to Shanghai's local lingo.", + "length": 61 + }, + { + "text": "The week-long event is part of the ATP World Tour Masters 1000.", + "length": 63 + }, + { + "text": "Nowadays, the yellow-labeled bottle is ubiquitous in supermarkets.", + "length": 66 + }, + { + "text": "View it from afar and you'd think you were sailing into Liverpool.", + "length": 66 + }, + { + "text": "Since 1994, each of them has had a turn as the tallest structure in China.", + "length": 74 + }, + { + "text": "The Bund refers to Shanghai's waterfront on the west bank of Huangpu River.", + "length": 75 + }, + { + "text": "The most magnificent building is today's number 10-12, the former HSBC building.", + "length": 80 + }, + { + "text": "The Chinese Formula One Grand Prix is one of the biggest annual events in Shanghai.", + "length": 83 + }, + { + "text": "The hearty beef and vegetable soup has evolved into Shanghai's favorite comfort food.", + "length": 85 + }, + { + "text": "It's now in the lobby of Shanghai Pudong Development Bank, the building's current occupier.", + "length": 91 + }, + { + "text": "When international events set up in China, Shanghai is as often as not the default host city.", + "length": 93 + }, + { + "text": "The city's British rulers and Russian refugees may be long gone, but their food has remained.", + "length": 93 + }, + { + "text": "In Xintiandi, high-end restaurants, pubs and clubs have taken over the revamped old buildings.", + "length": 94 + }, + { + "text": "When Russians fleeing the October Revolution of 1917 came to Shanghai, they brought their borscht.", + "length": 98 + }, + { + "text": "The 1,500-meter-long strip is a legacy passed down by one of the city's former rulers, Great Britain.", + "length": 101 + }, + { + "text": "5-billion investment will further promote Mickey Mouse and Donald Duck in the realm of the Monkey King.", + "length": 103 + }, + { + "text": "Since the 1990s, shikumen buildings have been getting pulled down more quickly than the skyscrapers rise.", + "length": 105 + }, + { + "text": "Now based in London, Tracy You is a native and longtime resident of Shanghai and a former CNN travel producer.", + "length": 110 + }, + { + "text": "The Shanghai Masters (October 4-12) tennis championship is attended by the highest-ranking players of the year.", + "length": 111 + }, + { + "text": "(CNN) -- Even during the harshest periods of the communist era, being Shanghainese had a special cachet in China.", + "length": 113 + }, + { + "text": "Here are the things that make China's booming commercial hub a unique place in the world's most populous country.", + "length": 113 + }, + { + "text": "Worcestershire sauce, that classic condiment from the United Kingdom, has a brother some 9,000 kilometers from home.", + "length": 116 + }, + { + "text": "Large-scale events in Beijing tend to carry political messages, while those in Shanghai focus more on fun and glamor.", + "length": 117 + }, + { + "text": "Shanghai dialect is filed with localized English words and pidgin English cultural identifiers unique to Shanghainese.", + "length": 118 + }, + { + "text": "Series of stone buildings were built in the 1870s as a way to accommodate the city's rapidly growing immigrant families.", + "length": 120 + }, + { + "text": "High-quality international restaurants are springing up in Shanghai so quickly that it's hard to keep track of them all.", + "length": 120 + }, + { + "text": "Although towering blocks mushroom throughout the metropolis, the skyscraper center point is the Lujiazui Financial District.", + "length": 124 + }, + { + "text": "Scheduled to open in 2015, the 121-story building is set to host what it claims will be world's tallest luxury hotel, J hotel.", + "length": 126 + }, + { + "text": "Known locally as \"spicy soy sauce,\" Shanghainese Worcestershire sauce was first produced in 1930 to cater to the large expat market.", + "length": 132 + }, + { + "text": "It was largely built in the late-19th and early-20th centuries to establish Shanghai as the British Empire's trading hub of the Far East.", + "length": 137 + }, + { + "text": "It's Chinese, but not entirely; its hybrid of Eastern and Western business and social traditions is found nowhere else in mainland China.", + "length": 137 + }, + { + "text": "According to Emporis, a global real estate data provider based in Germany, Shanghai ranks sixth among world cities with the most skyscrapers.", + "length": 141 + }, + { + "text": "When completed in 1923, the seven-story neoclassical landmark was dubbed \"the most luxurious building from the Suez Canal to the Bering Strait.", + "length": 143 + }, + { + "text": "Two dozen colossal Western structures, ranging in style from art deco to Victorian Gothic, stand side by side, forming a massive marble curtain.", + "length": 144 + }, + { + "text": "The city and its residents were a synonym for Western fashion and open-minded attitudes, as different as could be from their Mao-pin wearing comrades.", + "length": 150 + }, + { + "text": "There are 241 skyscrapers in Shanghai, eight fewer than Dubai, 14 more than in Seoul and 103 more than the second mainland city on the list, Guangzhou.", + "length": 151 + }, + { + "text": "New kitchens often set up on the Bund, around Xintiandi and in the former French Concession, as rivals to the established fine-dining scene in Beijing.", + "length": 151 + }, + { + "text": "Shanghai dialect is incomprehensible to a typical Mandarin speaker, whose language is largely based on pronunciation and vocabulary from northern China.", + "length": 152 + }, + { + "text": "With the financial center's move to the east bank of Huangpu River, the old Bund has become a new home for world-class hotels, restaurants and retailers.", + "length": 153 + }, + { + "text": "As modern high-rises in Shanghai have grabbed international attention, however, these local architectural treasures have been ignored or even shoved aside.", + "length": 155 + }, + { + "text": "In its labyrinth of alleyways, indie designers hang up cocktail dresses next to self-employed vendors selling replica communist souvenirs in the courtyard.", + "length": 155 + }, + { + "text": "\" The building's original ceiling mural managed to survive the Cultural Revolution; the octagonal mosaic painting is one of the best-kept secrets in Shanghai.", + "length": 158 + }, + { + "text": "But look beyond the glitzy restaurant menus and into the homes of private residents, and you'll see a tradition of international cuisine found nowhere else in China.", + "length": 165 + }, + { + "text": "Set to open toward the end of 2015, Shanghai Disneyland will be the first Disney theme park in mainland China (Hong Kong Disneyland opened in 2005) and the sixth in the world.", + "length": 175 + }, + { + "text": "The 258-room hotel -- a joint venture between Shanghai Jinjiang Hotel Group and Interstate Hotels and Resorts -- will occupy the 84th through 110th floors of the Shanghai Tower.", + "length": 177 + }, + { + "text": "Lujiazui is home to the most recognized high-rises in China, including the 468-meter Oriental Pearl TV Tower, the 421-meter Jinmao Tower and the 492-meter Shanghai World Financial Center.", + "length": 187 + }, + { + "text": "On the opposite side of the traditional Bund, Lujiazui appears so futuristic that it's become a Hollywood favorite as a setting for films, most recently featured in \"Her\" with Joaquin Phoenix.", + "length": 192 + }, + { + "text": "For more authentic shikumen neighborhoods, Cité Bourgogne on Shaanxi Nan Lu, and Jing'an Villa on Nanjing Xi Lu, are throwbacks to a uniquely Shanghai experience that's rapidly disappearing.", + "length": 193 + }, + { + "text": "A number of pockets in China have impressive Western buildings -- the German Quarter in Qingdao, Russian buildings in Harbin -- but none provide the surreal feeling of \"elsewhereness\" like the Bund.", + "length": 198 + }, + { + "text": "When the Communist Party took over in 1949, shikumen architecture was at its height -- there were around 200,000 shikumen buildings throughout central Shanghai, each divided into tenements to house five or even ten families.", + "length": 224 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.42530517280101776 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:45.263762046Z", + "first_section_created": "2025-12-23T09:33:45.265700427Z", + "last_section_published": "2025-12-23T09:33:45.266148346Z", + "all_results_received": "2025-12-23T09:33:45.359030929Z", + "output_generated": "2025-12-23T09:33:45.359312841Z", + "total_processing_time_ms": 95, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:45.265700427Z", + "publish_time": "2025-12-23T09:33:45.265990739Z", + "first_worker_start": "2025-12-23T09:33:45.266541663Z", + "last_worker_end": "2025-12-23T09:33:45.334902Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:45.266554463Z", + "start_time": "2025-12-23T09:33:45.266610465Z", + "end_time": "2025-12-23T09:33:45.266698869Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:45.266709Z", + "start_time": "2025-12-23T09:33:45.266858Z", + "end_time": "2025-12-23T09:33:45.334902Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:45.266611465Z", + "start_time": "2025-12-23T09:33:45.266691969Z", + "end_time": "2025-12-23T09:33:45.266815974Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:45.266453559Z", + "start_time": "2025-12-23T09:33:45.266541663Z", + "end_time": "2025-12-23T09:33:45.266585864Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:45.266035941Z", + "publish_time": "2025-12-23T09:33:45.266148346Z", + "first_worker_start": "2025-12-23T09:33:45.266533162Z", + "last_worker_end": "2025-12-23T09:33:45.358066Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:45.266545863Z", + "start_time": "2025-12-23T09:33:45.266598765Z", + "end_time": "2025-12-23T09:33:45.266634366Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:45.266845Z", + "start_time": "2025-12-23T09:33:45.266987Z", + "end_time": "2025-12-23T09:33:45.358066Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:45.266555763Z", + "start_time": "2025-12-23T09:33:45.266615066Z", + "end_time": "2025-12-23T09:33:45.266669868Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:45.266461159Z", + "start_time": "2025-12-23T09:33:45.266533162Z", + "end_time": "2025-12-23T09:33:45.266558263Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 159, + "min_processing_ms": 68, + "max_processing_ms": 91, + "avg_processing_ms": 79, + "median_processing_ms": 91, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3525, + "slowest_section_id": 1, + "slowest_section_time_ms": 92 + } +} diff --git a/data/output/00334c789267d080774be56096f384957b0a0bfc.json b/data/output/00334c789267d080774be56096f384957b0a0bfc.json new file mode 100644 index 0000000..e25db9b --- /dev/null +++ b/data/output/00334c789267d080774be56096f384957b0a0bfc.json @@ -0,0 +1,242 @@ +{ + "file_name": "00334c789267d080774be56096f384957b0a0bfc.txt", + "total_words": 397, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "rodriguez", + "count": 8 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "it", + "count": 7 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "at", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "in ENGLISH!", + "length": 11 + }, + { + "text": "'Football moves on though so it's one of those things.", + "length": 54 + }, + { + "text": "'It was hard at the start and that helped me through it.", + "length": 56 + }, + { + "text": "Gone: Rickie Lambert has already left the club, moving to Liverpool .", + "length": 69 + }, + { + "text": "VIDEO: Scroll down to see Pochettino give his first Spurs interview...", + "length": 70 + }, + { + "text": "'It was massive, a really big help to see the support I got,' Rodriguez added.", + "length": 78 + }, + { + "text": "'You don't want to see players going and the team being taken apart,' said Rodriguez.", + "length": 85 + }, + { + "text": "The 24-year-old says his disappointment was eased by support shown to him by the fans.", + "length": 86 + }, + { + "text": "Moving on: Jay Rodriguez is disappointed to be losing 'great manager' Mauricio Pochettino .", + "length": 91 + }, + { + "text": "'He worked us really hard and we got the good results and finished eighth which we were happy with.", + "length": 99 + }, + { + "text": "Southampton forward Jay Rodriguez admits the club have lost a 'great manager' in Mauricio Pochettino.", + "length": 101 + }, + { + "text": "Finisher: Jay Rodriguez became one of the finest young strikers in the Premier League under the Argentine .", + "length": 107 + }, + { + "text": "'How the Man City fans reacted on the day was great for me and obviously everyone at Southampton has been brilliant.", + "length": 116 + }, + { + "text": "'Mauricio is a great manager and a great guy and was well-respected by the lads,' Rodriguez told Press Association Sport.", + "length": 121 + }, + { + "text": "Pochettino left St Mary's to take the vacant post at Tottenham, where he was handed a five-year-contract at the end of May.", + "length": 123 + }, + { + "text": "' Outbound: Adam Lallana and Luke Shaw are just two of Southampton's stars who could be moving to a bigger club this summer .", + "length": 125 + }, + { + "text": "'You can sometimes never stop it, it is one of those things so I'm trying not to think about it and hopefully we will have everyone at pre-season.", + "length": 146 + }, + { + "text": "Striker Rickie Lambert followed Pochettino out the exit door to join Liverpool earlier this month and there are fears more players could do the same.", + "length": 149 + }, + { + "text": "' Southampton face a battle to hang on to their key players this summer with the likes of Adam Lallana, Luke Shaw and Calum Chambers all being linked with moves away.", + "length": 166 + }, + { + "text": "The Argentine guided Southampton to a club-record eighth-placed finish in the Barclays Premier League last season and Rodriguez was full of praise for his former boss.", + "length": 167 + }, + { + "text": "Rodriguez, who scored 17 goals in all competitions last season, was in contention for a place in England's World Cup squad before he suffered an anterior cruciate ligament injury against Manchester City in April.", + "length": 212 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5218866467475891 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:45.766969684Z", + "first_section_created": "2025-12-23T09:33:45.769001069Z", + "last_section_published": "2025-12-23T09:33:45.769198277Z", + "all_results_received": "2025-12-23T09:33:45.832916241Z", + "output_generated": "2025-12-23T09:33:45.83313695Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:45.769001069Z", + "publish_time": "2025-12-23T09:33:45.769198277Z", + "first_worker_start": "2025-12-23T09:33:45.769774901Z", + "last_worker_end": "2025-12-23T09:33:45.831876Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:45.769765801Z", + "start_time": "2025-12-23T09:33:45.769863905Z", + "end_time": "2025-12-23T09:33:45.769909207Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:45.769899Z", + "start_time": "2025-12-23T09:33:45.770034Z", + "end_time": "2025-12-23T09:33:45.831876Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:45.7697427Z", + "start_time": "2025-12-23T09:33:45.769813103Z", + "end_time": "2025-12-23T09:33:45.769882706Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:45.769709099Z", + "start_time": "2025-12-23T09:33:45.769774901Z", + "end_time": "2025-12-23T09:33:45.769817503Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2251, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/0033829349643e2964a8c3ce5fd997116ce43f7c.json b/data/output/0033829349643e2964a8c3ce5fd997116ce43f7c.json new file mode 100644 index 0000000..b29c80a --- /dev/null +++ b/data/output/0033829349643e2964a8c3ce5fd997116ce43f7c.json @@ -0,0 +1,254 @@ +{ + "file_name": "0033829349643e2964a8c3ce5fd997116ce43f7c.txt", + "total_words": 465, + "top_n_words": [ + { + "word": "the", + "count": 31 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "mdc", + "count": 11 + }, + { + "word": "parliament", + "count": 8 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "party", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "mugabe", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "I promise to be professional.", + "length": 29 + }, + { + "text": "An independent candidate won one seat.", + "length": 38 + }, + { + "text": "President Robert Mugabe's ZANU-PF got 99 seats.", + "length": 47 + }, + { + "text": "\"It will ensure that progressive laws are passed.", + "length": 49 + }, + { + "text": "Still, Tsvangirai said he would attend the swearing-in ceremony.", + "length": 64 + }, + { + "text": "An offshoot of the main MDC party, led by Arthur Mutambara, won 10 seats.", + "length": 73 + }, + { + "text": "The speaker of the parliament is the fourth most powerful post in Zimbabwe.", + "length": 75 + }, + { + "text": "\" CNN's Nkepile Mabuse in Johannesburg, South Africa contributed to this report.", + "length": 80 + }, + { + "text": "Morgan Tsvangirai's MDC faction has a slim majority following parliamentary elections.", + "length": 86 + }, + { + "text": "The vote took place hours after Mugabe swore in lawmakers, five months after they were elected.", + "length": 95 + }, + { + "text": "\"This is historic as it ceases to be a rubber-stamping house,\" Moyo said after winning the position.", + "length": 100 + }, + { + "text": "Final results gave 100 seats to an MDC faction led by Morgan Tsvangirai, the party's presidential candidate.", + "length": 108 + }, + { + "text": "Government spokesman Bright Matonga said Jembere had been accused of rape, but that he has been released from custody.", + "length": 118 + }, + { + "text": "A third member -- Elton Mangoma -- escaped an arrest attempt when other party members came to his rescue, MDC officials said.", + "length": 125 + }, + { + "text": "Attendance at the session of parliament is important since the membership is closely divided between the MDC and the Mugabe's ZANU-PF.", + "length": 134 + }, + { + "text": "Themba-Nyathi represented the splinter MDC faction led by Arthur Mutambara, but he had support of President Robert Mugabe's ZANU-PF party.", + "length": 138 + }, + { + "text": "\" One of those detained -- Shuwa Mudiwa -- appeared back in parliament, but the other member -- Elia Jembere -- was not seen, according to sources.", + "length": 147 + }, + { + "text": "MDC party spokesman Nelson Chamisa said all MDC members elected to parliament were expected to attend \"except those few MPs who are still in hiding.", + "length": 148 + }, + { + "text": "\" Moyo -- the national chairman of the main Movement for Democratic Change (MDC) party -- received 110 votes while his only opponent, Paul Themba-Nyathi, received 98 votes.", + "length": 172 + }, + { + "text": "An MDC official said the arrests were part of the \"sinister agenda of this regime\" to \"tilt the balance of numbers in their favour during the voting for the speaker of parliament.", + "length": 179 + }, + { + "text": "The ruling ZANU-PF party lost its majority in the 210-seat parliament in elections in March, but vote recounts and political violence have delayed the body from convening until now.", + "length": 181 + }, + { + "text": "HARARE, Zimbabwe (CNN) -- Zimbabwean lawmakers on Monday narrowly voted for Lovemore Moyo as speaker of the parliament -- making him the first opposition lawmaker to hold the position in the country's history.", + "length": 209 + }, + { + "text": "Two members of the main MDC -- led by presidential candidate Morgan Tsvangirai -- were arrested as they arrived at the opening session, but they were released after a short time, according to a government spokesman.", + "length": 215 + }, + { + "text": "Tsvangirai, who was locked in a bitter presidential contest with Mugabe, had objected to Mugabe's decision to convene parliament, saying it could \"decapitate\" power-sharing talks that have been on hold for the past two weeks.", + "length": 225 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.3790893852710724 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:46.269399789Z", + "first_section_created": "2025-12-23T09:33:46.269729103Z", + "last_section_published": "2025-12-23T09:33:46.269953913Z", + "all_results_received": "2025-12-23T09:33:46.3342395Z", + "output_generated": "2025-12-23T09:33:46.334390007Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:46.269729103Z", + "publish_time": "2025-12-23T09:33:46.269953913Z", + "first_worker_start": "2025-12-23T09:33:46.270506836Z", + "last_worker_end": "2025-12-23T09:33:46.333329Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:46.270435533Z", + "start_time": "2025-12-23T09:33:46.270506836Z", + "end_time": "2025-12-23T09:33:46.270552738Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:46.270711Z", + "start_time": "2025-12-23T09:33:46.270857Z", + "end_time": "2025-12-23T09:33:46.333329Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:46.270506136Z", + "start_time": "2025-12-23T09:33:46.270577539Z", + "end_time": "2025-12-23T09:33:46.270663042Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:46.270459034Z", + "start_time": "2025-12-23T09:33:46.270529037Z", + "end_time": "2025-12-23T09:33:46.270555438Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2855, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/003391069e04a2281b900b620063978f5c532467.json b/data/output/003391069e04a2281b900b620063978f5c532467.json new file mode 100644 index 0000000..d6d9339 --- /dev/null +++ b/data/output/003391069e04a2281b900b620063978f5c532467.json @@ -0,0 +1,396 @@ +{ + "file_name": "003391069e04a2281b900b620063978f5c532467.txt", + "total_words": 1009, + "top_n_words": [ + { + "word": "the", + "count": 76 + }, + { + "word": "in", + "count": 37 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "on", + "count": 16 + }, + { + "word": "at", + "count": 14 + }, + { + "word": "norman", + "count": 12 + }, + { + "word": "open", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "play-off.", + "length": 9 + }, + { + "text": "Derek Lawrenson .", + "length": 17 + }, + { + "text": "Now, he says he has had enough.", + "length": 31 + }, + { + "text": "Meryem Cup in Morocco this month .", + "length": 34 + }, + { + "text": "Putt your money on Bubba \u0026 Dustin .", + "length": 35 + }, + { + "text": "Hull shot 62 in the final round of .", + "length": 36 + }, + { + "text": "A lot of good things are going to follow.", + "length": 41 + }, + { + "text": "‘It feels good to win again after seven years.", + "length": 48 + }, + { + "text": "Early tips for the Masters, now just three weeks away?", + "length": 54 + }, + { + "text": "The big losers in all of this, of course, are the fans.", + "length": 55 + }, + { + "text": "How many players win professional events at the age of 17?", + "length": 58 + }, + { + "text": "But how about their ability to cope on those lightning-paced greens?", + "length": 68 + }, + { + "text": "Wrangle: Greg Norman will not play at The Open Championship this year .", + "length": 71 + }, + { + "text": "the Lalla Meryem Cup in Morocco, to catch the experienced French player .", + "length": 73 + }, + { + "text": "In line: Norman is one of the game's greats but will not be at the Open .", + "length": 73 + }, + { + "text": "Gwladys Nocera, and then birdied the first extra hole to beat her in the .", + "length": 74 + }, + { + "text": "Picture perfect: Norman tees off at the ninth hole in Ayrshire, Scotland .", + "length": 74 + }, + { + "text": "’ What on earth would cause the Shark to bite such lumps out of the two bodies?", + "length": 81 + }, + { + "text": "Again, Rolex were heavily involved and, again, Norman believes he was denied entry.", + "length": 83 + }, + { + "text": "How ironic this act of perfect timing looks like being sabotaged by a watch dispute.", + "length": 84 + }, + { + "text": "Sealed with a kiss: Matteo Manassero won the Castellon Masters Costa Azahar in 2010 .", + "length": 85 + }, + { + "text": "Record breaker: New Zealander Lydia Ko with the New Zealand Women's Golf Open trophy .", + "length": 86 + }, + { + "text": "A grand farewell at St Andrews, the last Open for which he will be eligible, appeared ideal.", + "length": 92 + }, + { + "text": "Job done: Bubba Watson won the tournament back in 2012 and will be gunning for another one .", + "length": 92 + }, + { + "text": "Icon: Norman and his wife Lorna back in 1993 as the Australian claims his second Claret Jug .", + "length": 93 + }, + { + "text": "In waiting: Dustin Johnson could be in line to pick up the green jacket in three weeks' time .", + "length": 94 + }, + { + "text": "You’ll know all about their length on an Augusta course offering the big bombers a huge advantage.", + "length": 100 + }, + { + "text": "It all concerns the intense rivalry between watch companies Rolex and Omega, both big players in golf.", + "length": 102 + }, + { + "text": "Matteo Manassero is the only one to have done so on the European Tour and did it twice for good measure.", + "length": 104 + }, + { + "text": "Nervous wait: Norman and his caddy take a break during the final round of the 1986 tournament in Turnberry .", + "length": 108 + }, + { + "text": "You could do worse than have a sneaky pound or two on former winner Bubba Watson and major champion-in-waiting, Dustin Johnson.", + "length": 127 + }, + { + "text": "I’ve nothing against the R\u0026A and the European Tour but, quite frankly, the way they go about their business is not the way I do it.", + "length": 133 + }, + { + "text": "With an historic Olympics not far away and so much more, what wonders the photogenic Hull is going to do for women’s golf in the UK.", + "length": 134 + }, + { + "text": "This, therefore, is the company of prodigies English starlet Charley Hull is keeping with her maiden success on the Ladies European Tour on Sunday.", + "length": 147 + }, + { + "text": "A month later, Norman was at  Gleneagles and planned to be present to hear his friend, Ryder Cup captain Jose-Maria Olazabal’s wildcards announcement.", + "length": 153 + }, + { + "text": "Ryo Ishikawa achieved similar things in Japan and then there’s the extraordinary New Zealander Lydia Ko, who broke all records in winning at the age of only 15.", + "length": 162 + }, + { + "text": "Let’s hope some wise heads at the R\u0026A and the European Tour can resolve it, for Norman’s inestimable contribution to recent Open lore deserves a fitting finale.", + "length": 164 + }, + { + "text": "She then became the youngest participant to compete in the Solheim Cup last August, where she proved one of the stars of Europe’s historic first success on American soil.", + "length": 172 + }, + { + "text": "The Woburn starlet will celebrate her 18th on Thursday, having rounded off a memorable year that saw her turn pro and promptly finish runner-up in each of her first five starts.", + "length": 177 + }, + { + "text": "Spot the difference: Norman kisses the trophy in 1986 (left) and in 1993 (right) ‘It has left a pretty sour taste in my mouth, a bad feeling after what I have done in the game.", + "length": 178 + }, + { + "text": "One of the great sights in recent Open history came at Royal Birkdale in 2008 when thousands of people followed the halfway leader Norman on his improbable quest to win a third Claret Jug at the age of 53.", + "length": 205 + }, + { + "text": "But, in news that will come as a bitter disappointment to his vast army of admirers, I can reveal it won’t be happening for the two-time Claret Jug winner, owing to an ugly dispute that reflects badly on the game.", + "length": 215 + }, + { + "text": "Well, the two men who currently stand miles clear of the rest on the PGA Tour this year when it comes to the longest streaks without a three-putt, at 237 and 217 holes respectively, just happen to be Watson and Johnson.", + "length": 219 + }, + { + "text": "Instead of meeting up with friends in Scotland and Ireland and playing in the Open at Royal Liverpool and the Senior Open at Royal Porthcawl in July, he will retire with his wife Kirsten to his holiday ranch in Colorado.", + "length": 220 + }, + { + "text": "The Open Championship at St Andrews next year appeared the perfect place for Greg Norman to wave goodbye, stopping — like the legends before him — on the Swilcan Burn for the obligatory  farewell photo of a lifetime.", + "length": 221 + }, + { + "text": "’ Australian John Senden had to end a victory drought going back to 2006  to make it to the Masters and that’s exactly what he did on Sunday, holding off the likes of Luke Donald to win the latest event on the Florida swing.", + "length": 229 + }, + { + "text": "The 59-year-old Aussie great, never one to pull his punches, said: ‘If you want the truth, I’ve played in my last Open and it’s all down to the way the Royal and Ancient and the European Tour have handled my whole situation.", + "length": 230 + }, + { + "text": "Such is their competitiveness that Norman, who represents Omega, reckons Rolex kept him out of playing in the pro-am for the Senior British Open — an event they sponsor — in 2012 over a Turnberry course where he had walked away from the field in the Open in 1986 (Rolex deny this).", + "length": 285 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.6744312047958374 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:46.770777898Z", + "first_section_created": "2025-12-23T09:33:46.771194713Z", + "last_section_published": "2025-12-23T09:33:46.771616829Z", + "all_results_received": "2025-12-23T09:33:46.869174699Z", + "output_generated": "2025-12-23T09:33:46.869406007Z", + "total_processing_time_ms": 98, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 97, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:46.771194713Z", + "publish_time": "2025-12-23T09:33:46.771487824Z", + "first_worker_start": "2025-12-23T09:33:46.772084247Z", + "last_worker_end": "2025-12-23T09:33:46.85948Z", + "total_journey_time_ms": 88, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:46.77243756Z", + "start_time": "2025-12-23T09:33:46.772529863Z", + "end_time": "2025-12-23T09:33:46.772648268Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:46.772626Z", + "start_time": "2025-12-23T09:33:46.772801Z", + "end_time": "2025-12-23T09:33:46.85948Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 86 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:46.772012044Z", + "start_time": "2025-12-23T09:33:46.772084247Z", + "end_time": "2025-12-23T09:33:46.772195251Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:46.772012244Z", + "start_time": "2025-12-23T09:33:46.772086247Z", + "end_time": "2025-12-23T09:33:46.772140249Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:46.771534126Z", + "publish_time": "2025-12-23T09:33:46.771616829Z", + "first_worker_start": "2025-12-23T09:33:46.772377858Z", + "last_worker_end": "2025-12-23T09:33:46.868267Z", + "total_journey_time_ms": 96, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:46.772482562Z", + "start_time": "2025-12-23T09:33:46.772518463Z", + "end_time": "2025-12-23T09:33:46.772531964Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:46.772673Z", + "start_time": "2025-12-23T09:33:46.772824Z", + "end_time": "2025-12-23T09:33:46.868267Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 95 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:46.772339856Z", + "start_time": "2025-12-23T09:33:46.772377858Z", + "end_time": "2025-12-23T09:33:46.772397959Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:46.772359657Z", + "start_time": "2025-12-23T09:33:46.772393158Z", + "end_time": "2025-12-23T09:33:46.772399659Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 181, + "min_processing_ms": 86, + "max_processing_ms": 95, + "avg_processing_ms": 90, + "median_processing_ms": 95, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2758, + "slowest_section_id": 1, + "slowest_section_time_ms": 96 + } +} diff --git a/data/output/0034446c452924146580adfa1f176fd24224ea92.json b/data/output/0034446c452924146580adfa1f176fd24224ea92.json new file mode 100644 index 0000000..31c9c7e --- /dev/null +++ b/data/output/0034446c452924146580adfa1f176fd24224ea92.json @@ -0,0 +1,356 @@ +{ + "file_name": "0034446c452924146580adfa1f176fd24224ea92.txt", + "total_words": 833, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "bbc", + "count": 22 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "it", + "count": 16 + }, + { + "word": "was", + "count": 16 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "said", + "count": 14 + }, + { + "word": "that", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "Unfortunately.", + "length": 14 + }, + { + "text": "They weren’t bureaucrats.", + "length": 27 + }, + { + "text": "It shows in the broadcasting.", + "length": 29 + }, + { + "text": "We failed to look at what our job was.", + "length": 38 + }, + { + "text": "And can we be critical of multiculturalism?", + "length": 43 + }, + { + "text": "Their bias has gone unchallenged for too long.", + "length": 46 + }, + { + "text": "” And when they said “yeah”, accepting it.", + "length": 48 + }, + { + "text": "‘We didn’t interrogate immigration rigorously enough.", + "length": 57 + }, + { + "text": "to pass through before it passed under his nose,' he said.", + "length": 58 + }, + { + "text": "He added: ‘It was and still is relentlessly middle class.", + "length": 59 + }, + { + "text": "There was a predominant voice – the liberal Oxbridge male.", + "length": 60 + }, + { + "text": "'Now it seems to me there are more bureaucrats than anything else.", + "length": 66 + }, + { + "text": "They were either very good journalists or very good at showbusiness.", + "length": 68 + }, + { + "text": "This year former Newsnight host Jeremy Paxman accused the BBC of being ‘smug’.", + "length": 82 + }, + { + "text": "The Chancellor said the BBC coverage was ‘nonsense’ and had been ‘hyperbolic’.", + "length": 86 + }, + { + "text": "George Osborne hit out at a one of its reports, which said his reforms were ‘utterly terrifying’.", + "length": 101 + }, + { + "text": "He has also said its coverage of climate change ‘abandoned the pretence of impartiality long ago’.", + "length": 102 + }, + { + "text": "Radio 4 presenter John Humphrys said the BBC ignored mass immigration for fears it would be considered racist .", + "length": 111 + }, + { + "text": "'But I’m a BBC man through and through and think it’s the greatest broadcasting system throughout the world.", + "length": 112 + }, + { + "text": "This was a historical issue and we now believe our reporting is in the right place and we cover this complex issue in depth.", + "length": 124 + }, + { + "text": "’ Humphrys is not the first senior BBC figure to criticise the corporation’s failure to challenge Left-wing assumptions.", + "length": 124 + }, + { + "text": "Speaking to the Yorkshire Post newspaper, he said: 'I was very lucky that when I was there it was run by programme makers...", + "length": 125 + }, + { + "text": "Humphrys said: ‘I do remember, vaguely, interviews with ministers at the time and saying, “Are you sure that’s all there’ll be?", + "length": 135 + }, + { + "text": "One of the BBC’s top presenters has admitted that the corporation ignored mass immigration because it feared critics would say it was racist.", + "length": 143 + }, + { + "text": "After more than a million Eastern European immigrants moved to the UK, senior Labour figures eventually admitted the policy had been a huge mistake.", + "length": 148 + }, + { + "text": "’ And another senior executive, current radio chief Helen Boaden, claimed the BBC had a ‘deep liberal bias’ when she became head of news in 2004.", + "length": 151 + }, + { + "text": "’ Sir Michael Parkinson says the BBC has far more bureaucrats than it did during his time at the corporation and that programmes have suffered as a result.", + "length": 157 + }, + { + "text": "' The veteran presenter said that when he was with the BBC he was able to talk ideas over with bosses while having a drink, but added that times have changed.", + "length": 159 + }, + { + "text": "A BBC spokesman said: ‘John Humphrys was merely echoing other senior BBC figures who have acknowledged that we were slow to reflect changing opinions on immigration.", + "length": 167 + }, + { + "text": "’ He said the BBC was ‘frightened of appearing racist’, adding: ‘We were too institutionally nervous of saying, isn’t immigration getting a little bit out of hand?", + "length": 173 + }, + { + "text": "’ Tory MP Conor Burns, who sits on the Commons culture, media and sport committee, said: ‘This is a refreshing outbreak of candour, honesty and rare insight from one of the big beasts of the BBC.", + "length": 199 + }, + { + "text": "The criticism, which is the latest in a string of admissions of Left-wing bias by senior BBC figures, comes weeks after the Government accused the Today programme of misrepresenting its spending cuts.", + "length": 200 + }, + { + "text": "And he said that BBC employees are unable to understand the concerns of ordinary people because they typically lead ‘sheltered’ middle-class lives and are overwhelmingly ‘liberal Oxbridge males’.", + "length": 203 + }, + { + "text": "Radio 4’s Today interviewer John Humphrys accused his employer of being ‘soft’, ‘complacent’ and ‘institutionally nervous’ when it came to tackling the story or questioning multiculturalism.", + "length": 204 + }, + { + "text": "Last year the BBC’s former head of TV news, Roger Mosey, criticised the corporation for shutting out critics of the European Union, saying: ‘On the BBC’s own admission, it did not give enough space to anti-immigration views or EU-withdrawalists.", + "length": 251 + }, + { + "text": "The veteran presenter admitted that the last Labour government’s controversial immigration policy was not sufficiently ‘interrogated’ by the BBC, saying: ‘The Labour government underestimated by a factor of ten the number of people who were going to move from Poland.", + "length": 275 + }, + { + "text": "Now Humphrys, 71, has told the Sunday Times Magazine that the corporation is facing an ‘existential crisis greater than it’s ever been’ because ‘people, serious, thoughtful people, talk seriously and thoughtfully about the future of the BBC in a way that they haven’t before’.", + "length": 288 + }, + { + "text": "’ Humphrys claimed he was partly responsible for the BBC’s ‘complacent’ approach towards immigration, because he failed to challenge Labour’s decision to allow migrants from Poland and Hungary to work in Britain from 2004, and the flawed prediction that only 13,000 would arrive.", + "length": 289 + }, + { + "text": "’ He said BBC employees are unable to understand the concerns of ordinary people because they typically have ‘sheltered’ middle-class lives and are overwhelmingly ‘liberal Oxbridge males’ The star went on to say that too many BBC staff were ‘arrogant’ and thought they knew ‘what was best for the country’.", + "length": 324 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7765248715877533 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:47.27252817Z", + "first_section_created": "2025-12-23T09:33:47.272920785Z", + "last_section_published": "2025-12-23T09:33:47.273324Z", + "all_results_received": "2025-12-23T09:33:47.364441728Z", + "output_generated": "2025-12-23T09:33:47.364621334Z", + "total_processing_time_ms": 92, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 91, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:47.272920785Z", + "publish_time": "2025-12-23T09:33:47.273189495Z", + "first_worker_start": "2025-12-23T09:33:47.273796718Z", + "last_worker_end": "2025-12-23T09:33:47.363423Z", + "total_journey_time_ms": 90, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:47.273787618Z", + "start_time": "2025-12-23T09:33:47.27385702Z", + "end_time": "2025-12-23T09:33:47.273964624Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:47.274025Z", + "start_time": "2025-12-23T09:33:47.274183Z", + "end_time": "2025-12-23T09:33:47.363423Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 89 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:47.273710715Z", + "start_time": "2025-12-23T09:33:47.273796718Z", + "end_time": "2025-12-23T09:33:47.273890622Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:47.273828619Z", + "start_time": "2025-12-23T09:33:47.273926023Z", + "end_time": "2025-12-23T09:33:47.274005026Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:47.273227797Z", + "publish_time": "2025-12-23T09:33:47.273324Z", + "first_worker_start": "2025-12-23T09:33:47.273875021Z", + "last_worker_end": "2025-12-23T09:33:47.313498Z", + "total_journey_time_ms": 40, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:47.27383942Z", + "start_time": "2025-12-23T09:33:47.273882121Z", + "end_time": "2025-12-23T09:33:47.273888021Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:47.274059Z", + "start_time": "2025-12-23T09:33:47.274194Z", + "end_time": "2025-12-23T09:33:47.313498Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 39 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:47.273897022Z", + "start_time": "2025-12-23T09:33:47.273923623Z", + "end_time": "2025-12-23T09:33:47.273932823Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:47.27383662Z", + "start_time": "2025-12-23T09:33:47.273875021Z", + "end_time": "2025-12-23T09:33:47.273878621Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 128, + "min_processing_ms": 39, + "max_processing_ms": 89, + "avg_processing_ms": 64, + "median_processing_ms": 89, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2586, + "slowest_section_id": 0, + "slowest_section_time_ms": 90 + } +} diff --git a/data/output/003477088ee670dee05e53d0b24ebfcf9a692cec.json b/data/output/003477088ee670dee05e53d0b24ebfcf9a692cec.json new file mode 100644 index 0000000..67eba6f --- /dev/null +++ b/data/output/003477088ee670dee05e53d0b24ebfcf9a692cec.json @@ -0,0 +1,368 @@ +{ + "file_name": "003477088ee670dee05e53d0b24ebfcf9a692cec.txt", + "total_words": 971, + "top_n_words": [ + { + "word": "to", + "count": 44 + }, + { + "word": "the", + "count": 37 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "she", + "count": 22 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "said", + "count": 17 + }, + { + "word": "that", + "count": 13 + }, + { + "word": "we", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "We still have a long way to go yet.", + "length": 35 + }, + { + "text": "And why was the impact so traumatic?", + "length": 36 + }, + { + "text": "\"No one wants to think of their sons as rapists.", + "length": 48 + }, + { + "text": "\"I do see reduction of shame, which is very good.", + "length": 49 + }, + { + "text": "That's the reason many victims don't come forward.", + "length": 50 + }, + { + "text": "Didn't she want the attention of one of the popular boys?", + "length": 57 + }, + { + "text": "I'm optimistic that we're heading in a better direction as a culture.", + "length": 70 + }, + { + "text": "\" She didn't set out to be a public touchstone in the genre, she said.", + "length": 70 + }, + { + "text": "calls it \"resilience literature,\" a term Anderson said she is proud of.", + "length": 71 + }, + { + "text": "\" The term \"young adult lit\" was hardly in use when Anderson wrote \"Speak.", + "length": 74 + }, + { + "text": "Talking to teens about sexuality, intimacy and consent is urgent, she said.", + "length": 75 + }, + { + "text": "I think we're all trying to find the right language surrounding sexual assault.", + "length": 79 + }, + { + "text": "Anderson said she wrote \"Speak\" based on her own experience of being raped as a teen.", + "length": 85 + }, + { + "text": "She struggled for years to find the words or the courage to express what she'd gone through.", + "length": 92 + }, + { + "text": "Over the years, more resources have emerged for survivors of sexual violence, especially online.", + "length": 96 + }, + { + "text": "\"If you find someone in a book, you know you're not alone and that's what's so comforting about books.", + "length": 102 + }, + { + "text": "Anderson, who published the award-winning novel in 1999, believes the questions come from an honest place.", + "length": 106 + }, + { + "text": "That feeling is understandable, but it's why we still need to do more to reduce the stigma around rape,\" she said.", + "length": 114 + }, + { + "text": "Much has changed since then, she said, and \"Speak\" has become required reading in some schools across the country.", + "length": 114 + }, + { + "text": "\"That can be the most painstaking aspect of being a teen, figuring out what the world really looks like,\" she said.", + "length": 115 + }, + { + "text": "Although many students first encountered \"Speak\" in high schools, it's now being taught in middle schools, Anderson said.", + "length": 121 + }, + { + "text": "And yet, many parents still struggle to find the words or the courage to talk to teens about sex and intimacy, Anderson said.", + "length": 125 + }, + { + "text": "Scores of students still describe the same struggles to Anderson, and she often directs them to the RAINN hot line, she said.", + "length": 125 + }, + { + "text": "\"So many teens out there are operating in a vacuum, they're operating in adult situations without any adult support or advice.", + "length": 126 + }, + { + "text": "Macmillan, the publisher of \"Speak,\" is matching donations to the organization in April, which is Sexual Assault Awareness Month.", + "length": 129 + }, + { + "text": "They're not used to reading novels that feature characters like Melinda Sordino, a teen who is raped by a classmate at a house party.", + "length": 133 + }, + { + "text": "They're teen boys, after all, growing up in a society where media and pop culture tell them women are created for sexual gratification.", + "length": 135 + }, + { + "text": "She believes parents can be more involved, too; just take a deep breath and commit to talking about sex and what constitutes consent, she said.", + "length": 143 + }, + { + "text": "While the Internet brings people together and creates supportive communities, it has also become the source of damaging images and intense bullying, she said.", + "length": 158 + }, + { + "text": "\"Speak\" is about teen rape, the pressures of high school and the insularity of small-town life, but most importantly, it's about overcoming stigma, Anderson said.", + "length": 162 + }, + { + "text": "\" For the 15th anniversary of \"Speak,\" Anderson is lending her support to the Rape, Abuse \u0026 Incest National Network, a resource for survivors of sexual violence.", + "length": 162 + }, + { + "text": "\"We are a culture who is right now in 2014 finally having the conversation that it actually doesn't matter what a woman is wearing, you're not supposed to rape her.", + "length": 164 + }, + { + "text": "She thinks it's an acknowledgment that sex education needs to start earlier if we want to help teens feel comfortable talking openly about sex and what feels right and wrong.", + "length": 174 + }, + { + "text": "\"It used to be that we teach girls not to be raped, but we need to start teaching boys not to be rapists, and that's a really hard thing for parents of boys to process,\" she said.", + "length": 179 + }, + { + "text": "Today, if you're a victim of any crime, including sexual violence, you can go online and \"find someone who's walked in your shoes who can help you make sense of what happened,\" she said.", + "length": 186 + }, + { + "text": "As her classmates and neighbors go to great lengths to protect her attacker, Melinda plunges into near-silence, refusing to say what happened while still feeling ostracized by her classmates.", + "length": 191 + }, + { + "text": "\"We as a culture are still figuring out how to teach our children the awesome parts of the Internet and cell phones and new media, but we also have to figure out how to keep them safe,\" she said.", + "length": 195 + }, + { + "text": "As a mother who raised four girls, Anderson knows that parents today are navigating uncharted territory when it comes to adolescent sexuality, and they're doing it earlier than parents in other generations.", + "length": 206 + }, + { + "text": "Fifteen years after its publication, society has shed some of the stigma associated with sexual violence, but the conflict at the heart of \"Speak\" still shows up in headlines, from Steubenville, Ohio, to Maryville, Missouri.", + "length": 224 + }, + { + "text": "During 15 years of talking to high school students about sex and bullying, Laurie Halse Anderson has continued to get the same questions from boys: Why was the main character in her book, \"Speak,\" so upset about what happened to her?", + "length": 233 + }, + { + "text": "\"We've fallen down on our responsibility to our children by somehow creating this world where they're surrounded by images of sexuality; and yet, we as adults struggle to talk to kids honestly about sex, the rules of dignity and consent,\" she said.", + "length": 248 + }, + { + "text": "\"Because boys and girls can be victims of rape, we need to try to teach them to make decisions about life that keep them safe, sober and with people they can trust, and make sure people who might be inclined to rape -- who think they can get away with it -- know they can't get away from it.", + "length": 291 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5171849429607391 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:47.774102536Z", + "first_section_created": "2025-12-23T09:33:47.77446235Z", + "last_section_published": "2025-12-23T09:33:47.774838064Z", + "all_results_received": "2025-12-23T09:33:47.870368257Z", + "output_generated": "2025-12-23T09:33:47.870555464Z", + "total_processing_time_ms": 96, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 95, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:47.77446235Z", + "publish_time": "2025-12-23T09:33:47.77473956Z", + "first_worker_start": "2025-12-23T09:33:47.775281381Z", + "last_worker_end": "2025-12-23T09:33:47.86952Z", + "total_journey_time_ms": 95, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:47.775450387Z", + "start_time": "2025-12-23T09:33:47.77552809Z", + "end_time": "2025-12-23T09:33:47.775686596Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:47.775623Z", + "start_time": "2025-12-23T09:33:47.775778Z", + "end_time": "2025-12-23T09:33:47.86952Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 93 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:47.775636394Z", + "start_time": "2025-12-23T09:33:47.775772099Z", + "end_time": "2025-12-23T09:33:47.775989007Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:47.775201878Z", + "start_time": "2025-12-23T09:33:47.775281381Z", + "end_time": "2025-12-23T09:33:47.775385185Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:47.774781862Z", + "publish_time": "2025-12-23T09:33:47.774838064Z", + "first_worker_start": "2025-12-23T09:33:47.775365684Z", + "last_worker_end": "2025-12-23T09:33:47.849031Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:47.775581892Z", + "start_time": "2025-12-23T09:33:47.775840302Z", + "end_time": "2025-12-23T09:33:47.775870003Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:47.775633Z", + "start_time": "2025-12-23T09:33:47.775787Z", + "end_time": "2025-12-23T09:33:47.849031Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:47.775332583Z", + "start_time": "2025-12-23T09:33:47.775378584Z", + "end_time": "2025-12-23T09:33:47.775397685Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:47.775328483Z", + "start_time": "2025-12-23T09:33:47.775365684Z", + "end_time": "2025-12-23T09:33:47.775375184Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 166, + "min_processing_ms": 73, + "max_processing_ms": 93, + "avg_processing_ms": 83, + "median_processing_ms": 93, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2726, + "slowest_section_id": 0, + "slowest_section_time_ms": 95 + } +} diff --git a/data/output/003482574b186d3ecda90b5db007bd55c718966c.json b/data/output/003482574b186d3ecda90b5db007bd55c718966c.json new file mode 100644 index 0000000..d7927d0 --- /dev/null +++ b/data/output/003482574b186d3ecda90b5db007bd55c718966c.json @@ -0,0 +1,274 @@ +{ + "file_name": "003482574b186d3ecda90b5db007bd55c718966c.txt", + "total_words": 447, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "galaxy", + "count": 17 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "is", + "count": 11 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "years", + "count": 9 + }, + { + "word": "light", + "count": 7 + }, + { + "word": "with", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "in", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "06:09 EST, 16 November 2012 .", + "length": 29 + }, + { + "text": "07:11 EST, 16 November 2012 .", + "length": 29 + }, + { + "text": "3 billion light-years from Earth.", + "length": 33 + }, + { + "text": "Astronomers have calculated the galaxy is a 13.", + "length": 47 + }, + { + "text": "1 - 1 percent the mass of our Milky Way’s stars.", + "length": 50 + }, + { + "text": "“The science output in this regard has been incredible.", + "length": 57 + }, + { + "text": "Dan Coe, from the Space Telescope Science Institute, said.", + "length": 58 + }, + { + "text": "For comparison the Milky Way is 150 000 light-years across.", + "length": 59 + }, + { + "text": "'This object may be one of many building blocks of a galaxy.", + "length": 60 + }, + { + "text": "The object was observed 420 million years after the big bang .", + "length": 62 + }, + { + "text": "' Galaxy Cluster MACS J0647, where the latest galaxy was spotted .", + "length": 66 + }, + { + "text": "Researcher have identified the furthest ever galaxy discovered in space - a staggering 13.", + "length": 90 + }, + { + "text": "The estimated mass of this baby galaxy is roughly equal to 100 million or a billion suns, or 0.", + "length": 95 + }, + { + "text": "3 billion light-years from Earth with a single light-year representing 5,878,625 million miles.", + "length": 95 + }, + { + "text": "The newly discovered galaxy, named MACS0647-JD, is very young and only a tiny fraction of the size of our Milky Way.", + "length": 116 + }, + { + "text": "The galaxy was observed around 420 million years after the Big Bang when the universe was just 3 per cent of its current age.", + "length": 125 + }, + { + "text": "Our galaxy, the Milky Way, is 150,000 light-years across with the Solar System a third of the age of the newly discovered galaxy.", + "length": 129 + }, + { + "text": "Scientists say the object is in the first stages of galaxy formation with analysis showing it is less than 600 light-years across.", + "length": 130 + }, + { + "text": "It was spotted using NASA’s Hubble Space Telescope, Spitzer Space Telescope, and one of nature’s own natural 'zoom lenses' in space.", + "length": 136 + }, + { + "text": "'Over the next 13 billion years, it may have dozens, hundreds, or even thousands of merging events with other galaxies and galaxy fragments.", + "length": 140 + }, + { + "text": "The object is so small it may be in the first stages of galaxy formation, with analysis showing the galaxy is less than 600 light-years across.", + "length": 143 + }, + { + "text": "The object, named MACS0647-JD, is the latest discovery from a programme which uses natural zoom lenses to reveal distant galaxies in the early universe.", + "length": 152 + }, + { + "text": "The Cluster Lensing And Supernova survey with Hubble (CLASH) is using massive galaxy clusters as cosmic telescopes to magnify distant galaxies behind them, an effect called gravitational lensing.", + "length": 195 + }, + { + "text": "Coe and his collaborators spent months ruling out alternative explanations for the object’s identity - such as red stars, brown dwarfs, and red galaxies - to conclude it was a very distant galaxy.", + "length": 198 + }, + { + "text": "Rychard Bouwens, from Leiden University, Holland, said: 'While one occasionally expects to find an extremely distant galaxy using the tremendous power of gravitational lensing, this latest discovery has outstripped even my expectations of what would be possible with the CLASH program.", + "length": 285 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6709755659103394 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:48.2756125Z", + "first_section_created": "2025-12-23T09:33:48.275901011Z", + "last_section_published": "2025-12-23T09:33:48.276100519Z", + "all_results_received": "2025-12-23T09:33:48.34446799Z", + "output_generated": "2025-12-23T09:33:48.344633696Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:48.275901011Z", + "publish_time": "2025-12-23T09:33:48.276100519Z", + "first_worker_start": "2025-12-23T09:33:48.27667874Z", + "last_worker_end": "2025-12-23T09:33:48.342382Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:48.276684341Z", + "start_time": "2025-12-23T09:33:48.276739743Z", + "end_time": "2025-12-23T09:33:48.276812845Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:48.276893Z", + "start_time": "2025-12-23T09:33:48.277038Z", + "end_time": "2025-12-23T09:33:48.342382Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:48.276606538Z", + "start_time": "2025-12-23T09:33:48.27668334Z", + "end_time": "2025-12-23T09:33:48.276771744Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:48.276594237Z", + "start_time": "2025-12-23T09:33:48.27667874Z", + "end_time": "2025-12-23T09:33:48.276719742Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2651, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/00349f3102caa59131477f705683fc197de5cb89.json b/data/output/00349f3102caa59131477f705683fc197de5cb89.json new file mode 100644 index 0000000..2180297 --- /dev/null +++ b/data/output/00349f3102caa59131477f705683fc197de5cb89.json @@ -0,0 +1,246 @@ +{ + "file_name": "00349f3102caa59131477f705683fc197de5cb89.txt", + "total_words": 476, + "top_n_words": [ + { + "word": "her", + "count": 25 + }, + { + "word": "the", + "count": 22 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "with", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "for", + "count": 13 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "kim", + "count": 9 + }, + { + "word": "s", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Fleur Fatale is the unavoidable temptation.", + "length": 43 + }, + { + "text": "This one, called Fleur Fatale, aims to reflect her love of roses .", + "length": 66 + }, + { + "text": "The bottle has been designed with Kim's love of flowers, particularly roses, in mind.", + "length": 85 + }, + { + "text": "The new scent, called Fleur Fatale, is described as a 'beautifully floral and elegant scent'.", + "length": 93 + }, + { + "text": "The 37-year-old rapper whisked the reality star off to Maui to ring in her 34th year together.", + "length": 94 + }, + { + "text": "And it seems that the reality TV star and entrepreneur is setting a good example for little North.", + "length": 98 + }, + { + "text": "It's also packed with floral notes such as iris and peony combined with white musk and sandalwood.", + "length": 98 + }, + { + "text": "The bottle comes with a high gloss finish, accented with touches of rose gold - only the best for Kim.", + "length": 102 + }, + { + "text": "Romance: Kanye West has reportedly whisked the reality star off to Maui to ring in her 34th year together .", + "length": 107 + }, + { + "text": "Entrepreneur: There's just no stopping businesswoman Kim Kardashian, who has released her seventh fragrance.", + "length": 108 + }, + { + "text": "But luckily for the workaholic, her beau Kanye organised a birthday surprise for her - a romantic trip to Hawaii.", + "length": 113 + }, + { + "text": "' It's been a busy few weeks for Kim, who is preparing to launch her new line with her sisters for Lipsy next week.", + "length": 115 + }, + { + "text": "Kim Kardashian yesterday revealed that her 16-month-old daughter with Kanye West will have to pay her own way in life - just like she did.", + "length": 138 + }, + { + "text": "Of course, Kim couldn't resist sharing a scenic holiday snap with her Twitter followers along with the caption: 'Good morning #BdaySurpriseGettaway.", + "length": 148 + }, + { + "text": "' While she didn't disclose the location, Popdust reports the couple were spotted having breakfast together at the Four Seasons Resort Maui at Wailea.", + "length": 150 + }, + { + "text": "Describing the perfume's creator, the release adds: 'There is something dangerously alluring about Kim, a magnetic energy that lures people into her world.", + "length": 155 + }, + { + "text": "The £24 ($38) fragrance apparently takes a different turn from her past fragrances and opens with fresh and crisp notes of blackcurrant, bergamot and violet.", + "length": 158 + }, + { + "text": "'With Kim's life continuing to blossom into the next chapter, it's time to plant a new seed with a seventh scent to add to her growing portfolio of successful fragrances,' reads the release.", + "length": 190 + }, + { + "text": "On top of filming for her hit show, designing her clothing range for Lipsy, creating her debut haircare range and penning her soon-to-launch coffee table book, the businesswoman has unveiled her seventh fragrance.", + "length": 213 + }, + { + "text": "Keeping up with Kim: On top of filming for her hit show, designing her clothing range for Lipsy, creating her debut haircare range and penning her soon-to-launch coffee table book, the businesswoman has unveiled her seventh fragrance .", + "length": 236 + }, + { + "text": "Sweet smell of success: The fragrance apparently takes a different turn from her past fragrances and opens with fresh and crisp notes of blackcurrant, bergamot and violet and ranges in price between £24 ($38) for a 30ml bottle and £40 ($64) for 100ml .", + "length": 254 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.45300596952438354 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:48.776847754Z", + "first_section_created": "2025-12-23T09:33:48.778678122Z", + "last_section_published": "2025-12-23T09:33:48.778862429Z", + "all_results_received": "2025-12-23T09:33:48.841309978Z", + "output_generated": "2025-12-23T09:33:48.841469984Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:48.778678122Z", + "publish_time": "2025-12-23T09:33:48.778862429Z", + "first_worker_start": "2025-12-23T09:33:48.779434151Z", + "last_worker_end": "2025-12-23T09:33:48.84042Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:48.779453952Z", + "start_time": "2025-12-23T09:33:48.779529854Z", + "end_time": "2025-12-23T09:33:48.779596057Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:48.779696Z", + "start_time": "2025-12-23T09:33:48.779847Z", + "end_time": "2025-12-23T09:33:48.84042Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:48.779369548Z", + "start_time": "2025-12-23T09:33:48.779434151Z", + "end_time": "2025-12-23T09:33:48.779500453Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:48.779386149Z", + "start_time": "2025-12-23T09:33:48.779454352Z", + "end_time": "2025-12-23T09:33:48.779498453Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2806, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0034ac0b89b8a5a315b20dc6e072dc6b53721d52.json b/data/output/0034ac0b89b8a5a315b20dc6e072dc6b53721d52.json new file mode 100644 index 0000000..713add6 --- /dev/null +++ b/data/output/0034ac0b89b8a5a315b20dc6e072dc6b53721d52.json @@ -0,0 +1,498 @@ +{ + "file_name": "0034ac0b89b8a5a315b20dc6e072dc6b53721d52.txt", + "total_words": 852, + "top_n_words": [ + { + "word": "the", + "count": 45 + }, + { + "word": "her", + "count": 29 + }, + { + "word": "she", + "count": 29 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "show", + "count": 16 + }, + { + "word": "for", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "76.", + "length": 3 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "shows.", + "length": 6 + }, + { + "text": "again.", + "length": 6 + }, + { + "text": "'They .", + "length": 7 + }, + { + "text": "' She .", + "length": 7 + }, + { + "text": "' She .", + "length": 7 + }, + { + "text": "Prime .", + "length": 7 + }, + { + "text": "' And .", + "length": 7 + }, + { + "text": "' She .", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "not attend.", + "length": 11 + }, + { + "text": "to be too'.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "politician...", + "length": 13 + }, + { + "text": "57 to charity.", + "length": 14 + }, + { + "text": "Larisa Brown .", + "length": 14 + }, + { + "text": "In the world of .", + "length": 17 + }, + { + "text": "messaging, it's huge.", + "length": 21 + }, + { + "text": "party while she is away.", + "length": 24 + }, + { + "text": "justified her decision, .", + "length": 25 + }, + { + "text": "be away for up to a month.", + "length": 26 + }, + { + "text": "politicians need to be too.", + "length": 27 + }, + { + "text": "taking part in the ITV show.", + "length": 28 + }, + { + "text": "04:51 EST, 12 November 2012 .", + "length": 29 + }, + { + "text": "05:49 EST, 12 November 2012 .", + "length": 29 + }, + { + "text": "It would have been mad to have refused...", + "length": 41 + }, + { + "text": "Here she talks politics with Hugo Taylor .", + "length": 42 + }, + { + "text": "Mrs Dorries said that while she may have .", + "length": 42 + }, + { + "text": "abortion time limits, or to 'big up Boris'.", + "length": 43 + }, + { + "text": "newspapers, as the distribution figures show us.", + "length": 48 + }, + { + "text": "talking to each other and more time talking to people.", + "length": 54 + }, + { + "text": "show and 12 million per show is a very large audience.", + "length": 54 + }, + { + "text": "I believe that we politicians need to spend less time .", + "length": 55 + }, + { + "text": "If that is where sixteen million people are, it's where .", + "length": 57 + }, + { + "text": "It's not really very different from Westminster after all.", + "length": 58 + }, + { + "text": "went into politics for reasons of deep belief and principle.", + "length": 60 + }, + { + "text": "on the programme to children's charities in her constituency.", + "length": 61 + }, + { + "text": "She says she 'seized' upon the opportunity, and 'who wouldn't?", + "length": 62 + }, + { + "text": "In her column she would donate her MP's salary during her time .", + "length": 64 + }, + { + "text": "majority of people don't look to Westminster and they don't buy .", + "length": 65 + }, + { + "text": "think many may have guessed that I am a bit of an anti-politics .", + "length": 65 + }, + { + "text": "do however surf the net, watch popular TV and engage with reality .", + "length": 67 + }, + { + "text": "That some of us politicians come from very normal backgrounds and .", + "length": 67 + }, + { + "text": "the start of filming for the show and imposed an embargo for it to .", + "length": 68 + }, + { + "text": "continued: 'An audience of 16 million people for the first and last .", + "length": 69 + }, + { + "text": "for the Australian reality TV jungle, and has been suspended by her .", + "length": 69 + }, + { + "text": "in a typically acerbic parting shot she said: 'Whilst the half term .", + "length": 69 + }, + { + "text": "appear in her Conservativehome website column after the show went live.", + "length": 71 + }, + { + "text": "British Legion in her constituency told ITV it received a letter from .", + "length": 71 + }, + { + "text": "for very long but I hope I can do something to make some people think .", + "length": 71 + }, + { + "text": "added: 'MPs are not popular and so I don't expect to be in the jungle .", + "length": 71 + }, + { + "text": "arguing that 'more people watch the X- Factor final than voted in the .", + "length": 71 + }, + { + "text": "recess is under way, I will be working with rats and snakes in a jungle.", + "length": 72 + }, + { + "text": "was published yesterday - that she had told the Whips Office she would .", + "length": 72 + }, + { + "text": "Young to suspend her and said Mrs Dorries had not informed him she was .", + "length": 72 + }, + { + "text": "MP has drawn widespread criticism for swapping her parliamentary duties .", + "length": 73 + }, + { + "text": "Minister David Cameron has backed the decision by Chief Whip Sir George .", + "length": 73 + }, + { + "text": "She made the extraordinary comments before her laptop was taken away at .", + "length": 73 + }, + { + "text": "Mrs Dorries insisted in her column - which she submitted a week ago and .", + "length": 73 + }, + { + "text": "to eat a kangaroo's testicle she may also have the chance to talk about .", + "length": 73 + }, + { + "text": "the MP in the Autumn, saying she would be 'out of the country' and could .", + "length": 74 + }, + { + "text": "general election', so the realm of reality TV is 'where politicians need .", + "length": 74 + }, + { + "text": "Nadine Dorries looks worn out after her first appearance on I'm A Celebrity...", + "length": 78 + }, + { + "text": "Nadine Dorries says she 'seized' upon the opportunity to take part on the show.", + "length": 79 + }, + { + "text": "Mrs Dorries did not mention what she intends to do with her fee from ITV's reality show.", + "length": 88 + }, + { + "text": "Under-fire MP Nadine Dorries has defended her controversial decision to go on I'm A Celebrity...", + "length": 97 + }, + { + "text": "Get Me Out of Here - claiming she will donate her MPs' salary during her time on the show to charity.", + "length": 101 + }, + { + "text": "She says in her blog that being on the show is a 'publicity gift' and a way of communicating with 16 million people.", + "length": 116 + }, + { + "text": "Despite the furore, Mrs Dorries seems to believe appearing on the show will only enhance her career as a politician.", + "length": 116 + }, + { + "text": "With an MPs' salary of £65,738 per year, if she lasts the full three weeks on the show, she will donate a mere £3,738.", + "length": 120 + }, + { + "text": "But Mrs Dorries has accounted for four weeks leave from her constituency - which would increase her donation to £5,056.", + "length": 120 + }, + { + "text": "She is receiving a reported £40,000 fee for taking part - nearly eight times the maximum amount of money she will donate.", + "length": 122 + }, + { + "text": "She wrote: 'Do people understand why I am in a jungle, eating only three handfuls of rice a day with a few beans thrown in?", + "length": 123 + }, + { + "text": "' The politician was accused of snubbing a Remembrance Sunday service yesterday in her constituency by taking part in the show.", + "length": 127 + }, + { + "text": "The news comes after her Mid-Bedfordshire constituents were furious at the prospect of her abandoning her post for up to four weeks on full pay.", + "length": 144 + }, + { + "text": "If Mrs Dorries, pictured on the show, will donate £5056,76 to charity accounting for four weeks away from her constituency to be in Australia .", + "length": 144 + }, + { + "text": "Get Me Out Of Here yesterday - coinciding with the publication of her column revealed she will donate her MPs' salary for the time she is on the show to charity .", + "length": 162 + }, + { + "text": "Tory MP Nadine Dorries has drawn widespread criticism for swapping her parliamentary duties for the Australian reality TV jungle, where she is pictured on the show, right .", + "length": 172 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4323386549949646 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:49.279631865Z", + "first_section_created": "2025-12-23T09:33:49.280006579Z", + "last_section_published": "2025-12-23T09:33:49.280307191Z", + "all_results_received": "2025-12-23T09:33:49.34835295Z", + "output_generated": "2025-12-23T09:33:49.348524757Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:49.280006579Z", + "publish_time": "2025-12-23T09:33:49.280307191Z", + "first_worker_start": "2025-12-23T09:33:49.280759908Z", + "last_worker_end": "2025-12-23T09:33:49.347374Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:49.280668304Z", + "start_time": "2025-12-23T09:33:49.280759908Z", + "end_time": "2025-12-23T09:33:49.280876012Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:49.280955Z", + "start_time": "2025-12-23T09:33:49.281099Z", + "end_time": "2025-12-23T09:33:49.347374Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:49.280780608Z", + "start_time": "2025-12-23T09:33:49.280900513Z", + "end_time": "2025-12-23T09:33:49.281115321Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:49.280665504Z", + "start_time": "2025-12-23T09:33:49.280760908Z", + "end_time": "2025-12-23T09:33:49.280846711Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4748, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/00350f70d66d7451f8f18ede6f865996384a539c.json b/data/output/00350f70d66d7451f8f18ede6f865996384a539c.json new file mode 100644 index 0000000..f62e869 --- /dev/null +++ b/data/output/00350f70d66d7451f8f18ede6f865996384a539c.json @@ -0,0 +1,428 @@ +{ + "file_name": "00350f70d66d7451f8f18ede6f865996384a539c.txt", + "total_words": 1148, + "top_n_words": [ + { + "word": "the", + "count": 87 + }, + { + "word": "to", + "count": 42 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "in", + "count": 34 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "be", + "count": 25 + }, + { + "word": "would", + "count": 25 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "shares", + "count": 21 + }, + { + "word": "price", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "A .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "sold.", + "length": 5 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "19:24 EST, 9 June 2013 .", + "length": 24 + }, + { + "text": "06:26 EST, 10 June 2013 .", + "length": 25 + }, + { + "text": "James Chapman, Political Editor .", + "length": 33 + }, + { + "text": "in private hands,’ Mr Barry said.", + "length": 35 + }, + { + "text": "The Treasury is considering the plan.", + "length": 37 + }, + { + "text": "The shares would be held in a nominee account.", + "length": 46 + }, + { + "text": "They would only be paid for at the time of sale.", + "length": 48 + }, + { + "text": "to want to sell since they would not profit at all.", + "length": 51 + }, + { + "text": "One described it privately as ‘electoral catnip’.", + "length": 53 + }, + { + "text": "They would keep the profits when the shares were sold.", + "length": 54 + }, + { + "text": "One senior Tory described the move as 'electoral catnip'.", + "length": 57 + }, + { + "text": "This proposal will create a whole new generation of shareholders.", + "length": 65 + }, + { + "text": "If the share price falls under the floor, no-one would be expected .", + "length": 68 + }, + { + "text": "’ Labour had indicated it would fight a full-scale shares giveaway.", + "length": 69 + }, + { + "text": "floor price would be established at the original level the shares are .", + "length": 71 + }, + { + "text": "An alternative plan is for people to be able to buy shares at a discount.", + "length": 73 + }, + { + "text": "We urge the Chancellor to take on the doubters and move ahead with this scheme.", + "length": 79 + }, + { + "text": "It is said that the plan would be billed as 'payback time' from the banking sector .", + "length": 84 + }, + { + "text": "It is also key that the taxpayer still benefits from any further rise in the share price.", + "length": 89 + }, + { + "text": "George Osborne is said to believe a privatisation of this kind would excite the electorate .", + "length": 92 + }, + { + "text": "This gives taxpayers confidence in taking on shares as there is no downside or upfront cost.", + "length": 92 + }, + { + "text": "’ The two banks have both said they expect to be ready to return to the private sector in 2014.", + "length": 97 + }, + { + "text": "The Treasury could raise approximately £17-18 billion through institutional and retail sales alone.", + "length": 100 + }, + { + "text": "The £1,500 no-risk stake in RBS and Lloyds would be the biggest privatisation ever seen in Britain .", + "length": 101 + }, + { + "text": "The report estimates that between 20 and 30 million would apply, meaning a likely stake of around £1,500.", + "length": 106 + }, + { + "text": "A simple shares giveaway would increase the national debt by £50 billion and wad therefore a ‘non-starter’.", + "length": 112 + }, + { + "text": "30 per cent of shares in Lloyds would be distributed to taxpayers who are able to apply for them at no initial cost .", + "length": 117 + }, + { + "text": "If the share price never exceeds the floor price, the shares would be returned to government ownership after ten years.", + "length": 119 + }, + { + "text": "The Treasury source added: ‘We genuinely don’t have some secret plan for bank share sales and are not in any mad rush.", + "length": 122 + }, + { + "text": "For taxpayers, this could mean between £1,100 and £1,650 worth of shares being allocated depending on the number of applicants.", + "length": 129 + }, + { + "text": "It also offers the most stable share price as distributed shares would not be sold below the price to be repaid to the government.", + "length": 130 + }, + { + "text": "The scheme has been devised by centre-Right think tank Policy Exchange and is understood to be being studied closely in the Treasury.", + "length": 133 + }, + { + "text": "Up to 48 million taxpayers could be offered a £1,500 no-risk stake in RBS and Lloyds in the biggest ever privatisation ever seen in Britain.", + "length": 141 + }, + { + "text": "Every British resident over the age of 18 who has a National Insurance number and is registered on the electoral roll could apply for a share.", + "length": 142 + }, + { + "text": "An announcement of plans for a share giveaway could come in Chancellor George Osborne’s speech to London’s Mansion House in ten days’ time.", + "length": 145 + }, + { + "text": "Individuals can then transfer their shareholding into a personal account should they wish, providing they have paid the government the floor price.", + "length": 147 + }, + { + "text": "A senior Treasury source confirmed the Policy Exchange proposal was ‘one interesting idea among many’, though he insisted no decisions had been taken.", + "length": 154 + }, + { + "text": "At the same time of the mass distribution, 25 per cent of shares in RBS and the remainding shares in Lloyds would be sold to institutional and retail investors.", + "length": 160 + }, + { + "text": "It would mean 50 to 55 per cent of shares in RBS and 30 per cent of shares in Lloyds being distributed to taxpayers who are able to apply for them at no initial cost.", + "length": 166 + }, + { + "text": "A blueprint published today suggests that up to £34billion of the government’s £48billion of shares in RBS and Lloyds would end up in the hands of individual voters.", + "length": 169 + }, + { + "text": "Instead, taxpayers would take the profits from any rise in the share price above the floor price but would not lose any money if the share price dropped below the floor price.", + "length": 175 + }, + { + "text": "Senior Tories believe the move will be a moment of huge political significance as it will be billed ‘payback time’ from the banking sector, as well as creating a new generation of shareowners.", + "length": 196 + }, + { + "text": "‘A giveaway or a loss-making firesale at the current share price would add billions to the national debt at a time when poor economic growth already means borrowing isn’t coming down,’ he said.", + "length": 199 + }, + { + "text": "Shadow chancellor Ed Balls declared last month that he would oppose the coalition’s moves to divest itself of the shares currently held by the government if it would adversely affect Britain’s deficit.", + "length": 205 + }, + { + "text": "Today’s report suggests that every taxpayer should be eligible to apply to be handed a ‘free’ stake in the two banks, which were part-nationalised by Gordon Brown in the banking crash of 2008 and 2009.", + "length": 207 + }, + { + "text": "‘In our view that means finding a solution that moves the banks quickly from the public to the private sector, while at the same time generating a stable share price and an opportunity for the banks to raise capital.", + "length": 218 + }, + { + "text": "It identified flaws in other options to privatise the banks before the general election in 2015, saying there was not enough time for a stage sale to institutional investors and the shares would have to be sold at a discount.", + "length": 225 + }, + { + "text": "‘A distribution to taxpayers with the government to be repaid on sale, combined with an institutional and retail placing, is the only option that allows almost all the government’s stake in the banks to be sold ahead of a 2015 election.", + "length": 240 + }, + { + "text": "Mr Osborne is said to believe a privatisation with discounted or given-away shares could rekindle the excitement of the ‘Tell Sid’ campaign, in which five million people bought shares in British Gas -- a move which is credited with helping the Tories win the 1987 general election.", + "length": 285 + }, + { + "text": "The report says that by offering a large portion of its shareholding to taxpayers in this way, the government would be able to move much if not all of the banks into private ownership in one go, at a better price than through a traditional sale, while at the same time giving the taxpayer the bulk of the upside.", + "length": 312 + }, + { + "text": "Today’s report, written by James Barty, former head of global equity strategy at Deutsche Bank, examines all the options available to the government and concludes that a distribution of shares in both RBS and LLoyds to taxpayers be repaid on sale, combined with a separate sale to institutional investors, is the best solution.", + "length": 329 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4652225375175476 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:49.78115893Z", + "first_section_created": "2025-12-23T09:33:49.783011999Z", + "last_section_published": "2025-12-23T09:33:49.783448116Z", + "all_results_received": "2025-12-23T09:33:49.881791215Z", + "output_generated": "2025-12-23T09:33:49.882002623Z", + "total_processing_time_ms": 100, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 98, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:49.783011999Z", + "publish_time": "2025-12-23T09:33:49.783246608Z", + "first_worker_start": "2025-12-23T09:33:49.78383783Z", + "last_worker_end": "2025-12-23T09:33:49.880855Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:49.78383663Z", + "start_time": "2025-12-23T09:33:49.783892632Z", + "end_time": "2025-12-23T09:33:49.783990136Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:49.78422Z", + "start_time": "2025-12-23T09:33:49.784361Z", + "end_time": "2025-12-23T09:33:49.880855Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 96 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:49.783785028Z", + "start_time": "2025-12-23T09:33:49.783859931Z", + "end_time": "2025-12-23T09:33:49.783994336Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:49.783768228Z", + "start_time": "2025-12-23T09:33:49.78383783Z", + "end_time": "2025-12-23T09:33:49.784485755Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:49.783328811Z", + "publish_time": "2025-12-23T09:33:49.783448116Z", + "first_worker_start": "2025-12-23T09:33:49.783895133Z", + "last_worker_end": "2025-12-23T09:33:49.856691Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:49.784241446Z", + "start_time": "2025-12-23T09:33:49.784279447Z", + "end_time": "2025-12-23T09:33:49.784315448Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:49.784235Z", + "start_time": "2025-12-23T09:33:49.784356Z", + "end_time": "2025-12-23T09:33:49.856691Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:49.784020537Z", + "start_time": "2025-12-23T09:33:49.78410094Z", + "end_time": "2025-12-23T09:33:49.784136542Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:49.783864531Z", + "start_time": "2025-12-23T09:33:49.783895133Z", + "end_time": "2025-12-23T09:33:49.783915633Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 168, + "min_processing_ms": 72, + "max_processing_ms": 96, + "avg_processing_ms": 84, + "median_processing_ms": 96, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3278, + "slowest_section_id": 0, + "slowest_section_time_ms": 97 + } +} diff --git a/data/output/00353931122eb56daffad4f1119749efcbe200d2.json b/data/output/00353931122eb56daffad4f1119749efcbe200d2.json new file mode 100644 index 0000000..2bc2615 --- /dev/null +++ b/data/output/00353931122eb56daffad4f1119749efcbe200d2.json @@ -0,0 +1,290 @@ +{ + "file_name": "00353931122eb56daffad4f1119749efcbe200d2.txt", + "total_words": 448, + "top_n_words": [ + { + "word": "a", + "count": 22 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "the", + "count": 15 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "police", + "count": 10 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "swanston", + "count": 8 + }, + { + "word": "she", + "count": 7 + }, + { + "word": "at", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "weapon.", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Mark Duell .", + "length": 12 + }, + { + "text": "09:32 EST, 14 May 2013 .", + "length": 24 + }, + { + "text": "11:47 EST, 14 May 2013 .", + "length": 24 + }, + { + "text": "She also denies disclosing confidential .", + "length": 41 + }, + { + "text": "Sorry we are unable to accept comments for legal reasons.", + "length": 57 + }, + { + "text": "management system other than for a policing purpose, passing .", + "length": 62 + }, + { + "text": "These conditions have been in place 'for some time', the court heard.", + "length": 69 + }, + { + "text": "advising him about police procedures and giving him two police shirts.", + "length": 70 + }, + { + "text": "police information to a Robert Morris, failing to report a confession, .", + "length": 72 + }, + { + "text": "allegations of misconduct include accessing intelligence on the records .", + "length": 73 + }, + { + "text": "his possession of class A drugs and his confession to an assault with a .", + "length": 73 + }, + { + "text": "confidential police information to a Miguel Sewell and failing to report .", + "length": 74 + }, + { + "text": "Dressed in a purple and black dress, she stood to hear the counts against her.", + "length": 78 + }, + { + "text": "Outside court: Swanston, of Portsmouth, allegedly failed to report a man for possessing class A drugs .", + "length": 103 + }, + { + "text": "Magistrates asked if she understood the allegations, from prosecutor Zoe Martin, and she replied 'yes'.", + "length": 103 + }, + { + "text": "A former woman police officer allegedly disclosed confidential intelligence and tactics, a court heard today.", + "length": 109 + }, + { + "text": "She denies wilfully neglecting to perform a duty as a holder of a public office from January 1 to October 18, 2012.", + "length": 115 + }, + { + "text": "She must sleep at her home address, not enter a given area and her passport must remain logged with Hampshire Police.", + "length": 117 + }, + { + "text": "Swanston, from Portsmouth, appeared at Basingstoke Magistrates’ Court this morning for a brief hearing to hear the charges against her.", + "length": 137 + }, + { + "text": "Officer: Rebecca Swanston (pictured today), 28, is alleged to have logged onto information systems and passed on confidential intelligence .", + "length": 140 + }, + { + "text": "Magistrates adjourned the case for trial and Swanston was bailed to appear at Winchester Crown Court for a plea and case management hearing on June 4.", + "length": 150 + }, + { + "text": "Swanston, who was based at Southampton Central Police Station, is alleged to have committed the offences over a 10-month period between January and October last year.", + "length": 166 + }, + { + "text": "Instead of upholding the law, she shared details with the intention of frustrating ongoing probes, detection of crime and the apprehension of offenders, prosecutors claim.", + "length": 171 + }, + { + "text": "Rebecca Swanston, 28, of Hampshire Police, is alleged to have logged onto information systems including the police records management system and passed on confidential intelligence.", + "length": 181 + }, + { + "text": "HQ: Swanston, who was based at Southampton Central Police Station (pictured), is alleged to have committed the offences over a 10-month period between January and October last year .", + "length": 182 + }, + { + "text": "Swanston, who today faced three charges of misconduct in public office, also allegedly failed to report a man for possessing class A drugs, plus confessions to assaults and other offences.", + "length": 188 + }, + { + "text": "The third count relates to accessing and leaking information and briefings about a suspect, Tariq Khan, to be passed onto him with the intention of frustrating an ongoing probe into serious offences.", + "length": 199 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6473748087882996 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:50.283885239Z", + "first_section_created": "2025-12-23T09:33:50.284294254Z", + "last_section_published": "2025-12-23T09:33:50.284489862Z", + "all_results_received": "2025-12-23T09:33:50.350491744Z", + "output_generated": "2025-12-23T09:33:50.35063225Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:50.284294254Z", + "publish_time": "2025-12-23T09:33:50.284489862Z", + "first_worker_start": "2025-12-23T09:33:50.285099185Z", + "last_worker_end": "2025-12-23T09:33:50.349634Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:50.285033182Z", + "start_time": "2025-12-23T09:33:50.285114585Z", + "end_time": "2025-12-23T09:33:50.285183688Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:50.28525Z", + "start_time": "2025-12-23T09:33:50.285401Z", + "end_time": "2025-12-23T09:33:50.349634Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:50.285015082Z", + "start_time": "2025-12-23T09:33:50.285099185Z", + "end_time": "2025-12-23T09:33:50.285175788Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:50.285032182Z", + "start_time": "2025-12-23T09:33:50.285130486Z", + "end_time": "2025-12-23T09:33:50.285163287Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2863, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/00354310584353392b2e7329bab09a1f7201705c.json b/data/output/00354310584353392b2e7329bab09a1f7201705c.json new file mode 100644 index 0000000..f1beeed --- /dev/null +++ b/data/output/00354310584353392b2e7329bab09a1f7201705c.json @@ -0,0 +1,290 @@ +{ + "file_name": "00354310584353392b2e7329bab09a1f7201705c.txt", + "total_words": 638, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "schemes", + "count": 14 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "are", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "tax", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Jason Groves .", + "length": 14 + }, + { + "text": "Between them they owe an estimated £4.", + "length": 39 + }, + { + "text": "What they are doing is unacceptable and wrong.", + "length": 46 + }, + { + "text": "‘They are taking a gamble on not being caught.", + "length": 48 + }, + { + "text": "Katie Melua is reportedly part of the Liberty scheme .", + "length": 54 + }, + { + "text": "Experts warn that this could force some into bankruptcy.", + "length": 56 + }, + { + "text": "HMRC will identify the schemes only by their serial numbers.", + "length": 60 + }, + { + "text": "Take That: Gary Barlow (left), Howard Donald, Jason Orange and Mark Owen.", + "length": 73 + }, + { + "text": "However, failure to make payments demanded by HMRC today could result in criminal charges.", + "length": 90 + }, + { + "text": "’ The Treasury declined to comment in detail on the individual schemes targeted in the clampdown.", + "length": 99 + }, + { + "text": "Barlow, Owen, Donald and their manager Jonathan Wild are reported to have invested some £26million in schemes .", + "length": 112 + }, + { + "text": "And instead of pursuing each case through the courts for years, HMRC may now issue demands for immediate payment.", + "length": 113 + }, + { + "text": "Members of the schemes will not face criminal charges, but letters will demand they make ‘accelerated payments’.", + "length": 116 + }, + { + "text": "9billion – equal to an average of about £114,000 each, although some are thought to owe millions of pounds in tax.", + "length": 117 + }, + { + "text": "The taxman believes more than 33,000 individuals and 10,000 companies are members of the schemes being published today.", + "length": 119 + }, + { + "text": "Accountancy firms are now required by law to reveal details of schemes that may be viewed as tax -avoidance arrangements.", + "length": 121 + }, + { + "text": "The scheme’s 1,600 members are also believed to include business tycoons, lawyers, doctors and other wealthy individuals.", + "length": 123 + }, + { + "text": "‘Most people entering these schemes know they are unfair and wrong – they enter these schemes with their eyes wide open.", + "length": 124 + }, + { + "text": "HM Revenue and Customs  is publishing a list of 1,200 avoidance schemes whose members will be told to pay up within 90 days.", + "length": 125 + }, + { + "text": "Tory MP Charlie Elphicke last night welcomed the move, saying the rich should be subject to the same tax laws as everyone else.", + "length": 127 + }, + { + "text": "He said members of the public would have little sympathy for wealthy individuals who had tried to avoid paying their fair share.", + "length": 128 + }, + { + "text": "Using the schemes is not illegal, and many of the members involved say they already pay large sums in tax and have done nothing wrong.", + "length": 134 + }, + { + "text": "David Elliott, a partner at accountants Moore Stephens, said today’s letters were likely to place some individuals in severe difficulty.", + "length": 138 + }, + { + "text": "‘It is quite right that the Revenue is taking the battle to people who are using their wealth to game our tax system,’ Mr Elphicke said.", + "length": 140 + }, + { + "text": "It was wound up in 2009 when a tax loophole was closed, but a test case relating to members’ tax avoidance is not due to be heard until next year.", + "length": 148 + }, + { + "text": "HMRC’s decision to publish the list of schemes follows an announcement in the Budget of new rules to make it easier for the taxman to close avoidance schemes.", + "length": 160 + }, + { + "text": "‘The behaviour of these people means that hard-working families who do the right thing and pay their taxes have to pay more to support the services we all use.", + "length": 161 + }, + { + "text": "‘Even a remote prospect of being made bankrupt could mean that taxpayers feel under pressure to settle disputed cases rather than take an appeal to the courts.", + "length": 161 + }, + { + "text": "Thousands of celebrities, sports stars and wealthy professionals will be warned today that they face massive bills following a clampdown on tax avoidance schemes.", + "length": 162 + }, + { + "text": "However, its list is understood to include the Liberty scheme, reportedly used by a string of high-profile figures including George Michael, Sir Michael Caine and Katie Melua.", + "length": 175 + }, + { + "text": "HMRC’s clampdown is also thought to cover the so-called Icebreaker schemes in which Take That’s Gary Barlow, Mark Owen, Howard Donald and manager Jonathan Wild are reported to have invested some £26million.", + "length": 211 + }, + { + "text": "He told Accountancy Age: ‘Receiving a demand to make up-front payment of tax could put some taxpayers under financial strain, and in the very worst cases, could even trigger personal bankruptcies or business insolvencies before the technical merits of the arrangement have been tested.", + "length": 287 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4893903136253357 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:50.7853308Z", + "first_section_created": "2025-12-23T09:33:50.787532183Z", + "last_section_published": "2025-12-23T09:33:50.787791693Z", + "all_results_received": "2025-12-23T09:33:50.850417949Z", + "output_generated": "2025-12-23T09:33:50.850606156Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:50.787532183Z", + "publish_time": "2025-12-23T09:33:50.787791693Z", + "first_worker_start": "2025-12-23T09:33:50.78825611Z", + "last_worker_end": "2025-12-23T09:33:50.849474Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:50.788271311Z", + "start_time": "2025-12-23T09:33:50.788349114Z", + "end_time": "2025-12-23T09:33:50.788422017Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:50.788473Z", + "start_time": "2025-12-23T09:33:50.788637Z", + "end_time": "2025-12-23T09:33:50.849474Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:50.788176107Z", + "start_time": "2025-12-23T09:33:50.78825611Z", + "end_time": "2025-12-23T09:33:50.788363614Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:50.788187408Z", + "start_time": "2025-12-23T09:33:50.78825721Z", + "end_time": "2025-12-23T09:33:50.788315913Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3818, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/003558aeb0a2aa238508d17bac8624e6388b96fd.json b/data/output/003558aeb0a2aa238508d17bac8624e6388b96fd.json new file mode 100644 index 0000000..82885a6 --- /dev/null +++ b/data/output/003558aeb0a2aa238508d17bac8624e6388b96fd.json @@ -0,0 +1,254 @@ +{ + "file_name": "003558aeb0a2aa238508d17bac8624e6388b96fd.txt", + "total_words": 345, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "her", + "count": 16 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "is", + "count": 10 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "cat", + "count": 6 + }, + { + "word": "on", + "count": 6 + }, + { + "word": "penelope", + "count": 6 + }, + { + "word": "she", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Jill Reilly .", + "length": 13 + }, + { + "text": "13:00 EST, 24 October 2012 .", + "length": 28 + }, + { + "text": "02:01 EST, 25 October 2012 .", + "length": 28 + }, + { + "text": "The man started off on his journey just .", + "length": 41 + }, + { + "text": "recently gone missing from her home in the area.", + "length": 48 + }, + { + "text": "The Scottish SPCA is now looking to reunite her with her owner.", + "length": 63 + }, + { + "text": "off London Road in Edinburgh and it is believed the cat may have .", + "length": 66 + }, + { + "text": "'She's doing well and has proven to be very friendly and good-natured.", + "length": 70 + }, + { + "text": "Homeless: The Scottish SPCA is now looking to reunite her with her owner .", + "length": 74 + }, + { + "text": "'Penelope is receiving antibiotics for her burns and has had her jaw wired.", + "length": 75 + }, + { + "text": "A cat survived a six-mile journey perched on the boiling metal of a van's engine.", + "length": 81 + }, + { + "text": "'Penelope's owner must be very worried and we'd love to return her home after her frightening ordeal.", + "length": 101 + }, + { + "text": "She said the charity hoped that due to her distinctive coat she would be recognised and claimed by her owners.", + "length": 110 + }, + { + "text": "Treatment: A cat who survived a six-mile journey sitting on a van's engine is being treated for burns and a fractured jaw .", + "length": 123 + }, + { + "text": "Steph Grant, animal rescue officer, said: 'Penelope has sustained burns to the pads on her feet and part of her coat is singed.", + "length": 127 + }, + { + "text": "'It could be that Penelope is a house cat and isn't used to being outdoors so she went inside the engine for warmth and shelter.", + "length": 128 + }, + { + "text": "The Persian cat, nicknamed Penelope, was discovered by the driver under his bonnet after noticing a burning smell yesterday morning.", + "length": 132 + }, + { + "text": "Penelope who is now being treated for burns and a fractured jaw, is thought to have fallen from a height before ending up in the van.", + "length": 133 + }, + { + "text": "Travels: The man started off on his journey just off London Road in Edinburgh and it is believed the cat may have recently gone missing from her home in the area .", + "length": 163 + }, + { + "text": "'She also has a fractured jaw and cut under her chin as well as a missing tooth, so we think she may have fallen from a height and hit her face on the ground shortly before climbing inside the engine.", + "length": 200 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4510943293571472 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:51.288502727Z", + "first_section_created": "2025-12-23T09:33:51.288835439Z", + "last_section_published": "2025-12-23T09:33:51.289060448Z", + "all_results_received": "2025-12-23T09:33:51.358883974Z", + "output_generated": "2025-12-23T09:33:51.359020879Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:51.288835439Z", + "publish_time": "2025-12-23T09:33:51.289060448Z", + "first_worker_start": "2025-12-23T09:33:51.28964977Z", + "last_worker_end": "2025-12-23T09:33:51.35782Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:51.289599268Z", + "start_time": "2025-12-23T09:33:51.28964977Z", + "end_time": "2025-12-23T09:33:51.289684171Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:51.289773Z", + "start_time": "2025-12-23T09:33:51.289912Z", + "end_time": "2025-12-23T09:33:51.35782Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:51.28965057Z", + "start_time": "2025-12-23T09:33:51.289702272Z", + "end_time": "2025-12-23T09:33:51.289754074Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:51.289625369Z", + "start_time": "2025-12-23T09:33:51.289682471Z", + "end_time": "2025-12-23T09:33:51.289700072Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1857, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/003574fd5a2152347de4f8c8d3512d7f73fe0326.json b/data/output/003574fd5a2152347de4f8c8d3512d7f73fe0326.json new file mode 100644 index 0000000..da9850a --- /dev/null +++ b/data/output/003574fd5a2152347de4f8c8d3512d7f73fe0326.json @@ -0,0 +1,298 @@ +{ + "file_name": "003574fd5a2152347de4f8c8d3512d7f73fe0326.txt", + "total_words": 762, + "top_n_words": [ + { + "word": "the", + "count": 55 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "read", + "count": 20 + }, + { + "word": "message", + "count": 19 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "it", + "count": 15 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "been", + "count": 12 + }, + { + "word": "has", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "Now stalk with more vengeance.", + "length": 30 + }, + { + "text": "Apple's iMessages also use this feature.", + "length": 40 + }, + { + "text": "The app update is available across all mobile platforms.", + "length": 56 + }, + { + "text": "The app update is available across all mobile platforms.", + "length": 56 + }, + { + "text": "'For me, if I don’t feel like replying, I just don’t.", + "length": 57 + }, + { + "text": "’ It is not currently possible to opt out of this feature.", + "length": 60 + }, + { + "text": "Users need to update the app on their devices to see the changes.", + "length": 65 + }, + { + "text": "However, these changes have divided opinion, especially on Twitter.", + "length": 67 + }, + { + "text": "WhatsApp is owned by Facebook, which has a similar feature on its Messenger app.", + "length": 80 + }, + { + "text": "The two blue ticks only appear when all participants in the group have read the post.", + "length": 85 + }, + { + "text": "Now, when a message has been sent, delivered and read, the double grey tick turns blue.", + "length": 87 + }, + { + "text": "’ In a group chat, the blue ticks only appear when all participants have read the message.", + "length": 92 + }, + { + "text": "And clicking on the message will show you the precise time the recipient opened and read it.", + "length": 92 + }, + { + "text": "As @the_baba1 wrote: ‘So WhatsApp now shows blue ticks to indicate a message has been read.", + "length": 93 + }, + { + "text": "To see what time the messages were read users can now press and hold down on a particular post.", + "length": 95 + }, + { + "text": "To see what time the messages were read users can now press and hold down on a particular post.", + "length": 95 + }, + { + "text": "In a group chat, the blue ticks only appear when all participants in the group have read the message.", + "length": 101 + }, + { + "text": "In a group chat, the second ticks appear when all participants in the group have received the message.", + "length": 102 + }, + { + "text": "From today it will be harder to avoid WhatsApp messages, or at least claim that you haven’t seen them.", + "length": 104 + }, + { + "text": "Some users are concerned the update makes it harder for them to ignore others, and makes it easier for people to keep tabs on them.", + "length": 131 + }, + { + "text": "To see what time the messages were read, users can now press and hold down on a particular post and select the Message Info option.", + "length": 131 + }, + { + "text": "In an official blog post, WhatsApp tried to reassure users who may be concerned about why a person hadn't received or read their messages.", + "length": 138 + }, + { + "text": "Selecting the Message Info option reveals the time the message was read, and in the case of group messages, what time each member read it.", + "length": 138 + }, + { + "text": "WhatsApp has rolled out the changes to all variations of its app, across iOS, Android, Windows Phone, Nokia S40, Nokia S60 and BlackBerry 10.", + "length": 141 + }, + { + "text": "Selecting the Message Info option then reveals the time the message was read, and in the case of group messages, what time each member read it.", + "length": 143 + }, + { + "text": "When a message is opened and read, a tick appears in the message list and the time it was read appears in the bottom right-hand corner of the post.", + "length": 147 + }, + { + "text": "‘If you only see a single check mark next to your message, there is likely nothing wrong with WhatsApp or your phone,’ explained the developers.", + "length": 148 + }, + { + "text": "From today the Facebook-owned app will display two blue ticks in the bottom right-hand corner of an individual message to show that it has been read.", + "length": 149 + }, + { + "text": "From now, a single grey tick means the message has been sent, a double grey tick means it has been delivered, and two blue ticks mean it has been read.", + "length": 151 + }, + { + "text": "’ But not all Twitter users were concerned about the update - @tnicole91 explained: ‘I don’t get why some people are stressed about this new WhatsApp blue tick.", + "length": 166 + }, + { + "text": "’ Twitter user @carpocarp said: ‘The WhatsApp blue tick is going to ruin friendships,’ while @archielbar added: ‘Those blue ticks will be the death of some of us!", + "length": 170 + }, + { + "text": "Before the update, there were only two types of tick - a single grey tick meant the message had been successfully sent, and a double grey tick meant it had been delivered.", + "length": 171 + }, + { + "text": "In the case of group messages, this reveals what time each individual member read it (pictured) WhatsApp explained the changes and revealed what the new check marks look like on an official blog post .", + "length": 201 + }, + { + "text": "From now, a single grey tick means the message has been sent, a double grey tick means it has been delivered, and two blue ticks mean it has been read (pictured) More information about the features can be found on WhatsApp's FAQ page.", + "length": 234 + }, + { + "text": "‘There may be several reasons why your message has been sent, but not delivered to your chat partner such as their phone might be off, they could be sleeping, especially if they live in a different time zone, they might be experiencing network connection issues or they might have seen the notification on their screen, but did not launch the app - especially common if the recipient uses an iPhone.", + "length": 401 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6117870211601257 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:51.789538272Z", + "first_section_created": "2025-12-23T09:33:51.789974089Z", + "last_section_published": "2025-12-23T09:33:51.790419206Z", + "all_results_received": "2025-12-23T09:33:51.853184866Z", + "output_generated": "2025-12-23T09:33:51.853325772Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:51.789974089Z", + "publish_time": "2025-12-23T09:33:51.790419206Z", + "first_worker_start": "2025-12-23T09:33:51.790752818Z", + "last_worker_end": "2025-12-23T09:33:51.852287Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:51.79107573Z", + "start_time": "2025-12-23T09:33:51.791153133Z", + "end_time": "2025-12-23T09:33:51.791220436Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:51.79121Z", + "start_time": "2025-12-23T09:33:51.791349Z", + "end_time": "2025-12-23T09:33:51.852287Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:51.791015828Z", + "start_time": "2025-12-23T09:33:51.791088931Z", + "end_time": "2025-12-23T09:33:51.791191635Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:51.790682215Z", + "start_time": "2025-12-23T09:33:51.790752818Z", + "end_time": "2025-12-23T09:33:51.790788519Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4247, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/00359d77485ec3493d33d0b4f302cc1c13800445.json b/data/output/00359d77485ec3493d33d0b4f302cc1c13800445.json new file mode 100644 index 0000000..143bf86 --- /dev/null +++ b/data/output/00359d77485ec3493d33d0b4f302cc1c13800445.json @@ -0,0 +1,370 @@ +{ + "file_name": "00359d77485ec3493d33d0b4f302cc1c13800445.txt", + "total_words": 687, + "top_n_words": [ + { + "word": "a", + "count": 32 + }, + { + "word": "the", + "count": 29 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "gomez", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "drug", + "count": 12 + }, + { + "word": "he", + "count": 12 + }, + { + "word": "was", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "drug boss.", + "length": 10 + }, + { + "text": "Local media .", + "length": 13 + }, + { + "text": "side of a gang.", + "length": 15 + }, + { + "text": "drug-related violence.", + "length": 22 + }, + { + "text": "baited the government.", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "broad sweep of businesses.", + "length": 26 + }, + { + "text": "He is also wanted by the U.", + "length": 27 + }, + { + "text": "They also controlled politicians and .", + "length": 38 + }, + { + "text": "In 2013 it held much of the impoverished, .", + "length": 43 + }, + { + "text": "businesses including the export of iron ore.", + "length": 44 + }, + { + "text": "Both Guzman and Gomez were feared drug lords.", + "length": 45 + }, + { + "text": "before later joining La Familia, authorities said.", + "length": 50 + }, + { + "text": "arrested a handful of people connected to La Tuta.", + "length": 50 + }, + { + "text": "split within drug cartel La Familia Michoacana, a .", + "length": 51 + }, + { + "text": "Last week, police seized many properties there and .", + "length": 52 + }, + { + "text": "government for methamphetamine and cocaine trafficking.", + "length": 55 + }, + { + "text": "Police also arrested a number of other people with him.", + "length": 55 + }, + { + "text": "while defending his Knights Templar as a 'necessary evil.", + "length": 57 + }, + { + "text": "diversified from drug trafficking into a myriad of other .", + "length": 58 + }, + { + "text": "' 'Our only function is to help the people, preserve our .", + "length": 58 + }, + { + "text": "Gomez insisted the cartel followed a strict ethical code, .", + "length": 59 + }, + { + "text": "Sporting a goatee beard and often wearing a baseball cap, .", + "length": 59 + }, + { + "text": "Gomez became the front man of the Knights Templar after a .", + "length": 59 + }, + { + "text": "Since the Mexican government began a military crackdown in .", + "length": 60 + }, + { + "text": "pseudo-religious gang in which he was also a leading figure.", + "length": 60 + }, + { + "text": "Whether railing against political corruption on YouTube, or .", + "length": 61 + }, + { + "text": "images of Ernesto 'Che' Guevara and other revolutionary icons.", + "length": 62 + }, + { + "text": "though as time passed he became more open about the criminal .", + "length": 62 + }, + { + "text": "Gomez began smuggling marijuana independently in 2001 or 2002, .", + "length": 64 + }, + { + "text": "giving interviews in hideouts to the media, Gomez relentlessly .", + "length": 64 + }, + { + "text": "He continuously accused officials of colluding with rival gangs .", + "length": 65 + }, + { + "text": "reported that an earlier operation had led to the arrest of the .", + "length": 65 + }, + { + "text": "2007 on drug gangs, more than 100,000 people have been killed in .", + "length": 66 + }, + { + "text": "mountainous terrain of Michoacan in a firm grip and often extorted a .", + "length": 70 + }, + { + "text": "One of Mexico's most wanted drug lords, 'La Tuta', has been captured.", + "length": 70 + }, + { + "text": "But he became the prime target of Pena Nieto's drive to regain control of Michoacan.", + "length": 84 + }, + { + "text": "However the man caught up in the country's latest sting, Gomez, often sought the limelight.", + "length": 91 + }, + { + "text": "A Mexican police spokesman said: 'He will be brought to Mexico City in the coming hours to make a declaration.", + "length": 110 + }, + { + "text": "Servando 'LaTuta' Gomez, 49, was caught on Friday in Morelia, Michoacan's state capital after months of intelligence work .", + "length": 123 + }, + { + "text": "state, and preserve our country from people causing terror,' Gomez said in a video posted online in 2012, sitting in front of .", + "length": 127 + }, + { + "text": "Gomez, is implicated in the 2009 murder of 12 Mexican federal police officers, according to the United States Justice Department.", + "length": 129 + }, + { + "text": "Last year, in another coup, Cartel boss Joaquin 'Shorty' Guzman, head of the Sinaloa Cartel, a powerful drug smuggling gang was captured.", + "length": 137 + }, + { + "text": "Servando Gomez, 49, was caught on Friday in Morelia, Michoacan's state capital after months of intelligence work by Mexican security forces.", + "length": 141 + }, + { + "text": "He was cornered in a house in a peaceful showdown where no shots were fired - making him the second drug kingpin to fall in just over a year.", + "length": 142 + }, + { + "text": "The area is a violent western state wracked by clashes between Gomez's Knights Templar cartel and heavily-armed vigilantes trying to oust them.", + "length": 143 + }, + { + "text": "He is battling against lingering public anger over the September abduction and apparent massacre of 43 trainee teachers by corrupt police in league with gang members.", + "length": 166 + }, + { + "text": "Gomez, who had a $2 million bounty on his head before his arrest, was known as La Tuta' because he was a former teacher and left his profession to become a drug boss.", + "length": 166 + }, + { + "text": "He was cooking breakfast along with his beauty queen wife when police smashed down the door of the condo where they were hiding in Mazatlan, a beach resort town on Mexico's Pacific Coast.", + "length": 187 + }, + { + "text": "' His arrest marks a victory for Mexican President Enrique Pena Nieto as he grapples with grisly gang violence and attempts to quell outrage over violence, impunity and corruption in Mexico.", + "length": 190 + }, + { + "text": "Drug lord: Gomez, pictured, became the front man of the Knights Templar after a split within drug cartel La Familia Michoacana, a pseudo-religious gang in which he was also a leading figure .", + "length": 191 + }, + { + "text": "Powerful: Joaquin 'El Chapo' Guzman is escorted to a helicopter in handcuffs by Mexican navy marines at a navy hanger in Mexico City, Mexico, after finally being captured following years on the run .", + "length": 199 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5819928646087646 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:52.289427466Z", + "first_section_created": "2025-12-23T09:33:52.289752679Z", + "last_section_published": "2025-12-23T09:33:52.290017689Z", + "all_results_received": "2025-12-23T09:33:52.357605931Z", + "output_generated": "2025-12-23T09:33:52.357782837Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:52.289752679Z", + "publish_time": "2025-12-23T09:33:52.290017689Z", + "first_worker_start": "2025-12-23T09:33:52.290528008Z", + "last_worker_end": "2025-12-23T09:33:52.356665Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:52.290563609Z", + "start_time": "2025-12-23T09:33:52.290644512Z", + "end_time": "2025-12-23T09:33:52.290756416Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:52.290815Z", + "start_time": "2025-12-23T09:33:52.290959Z", + "end_time": "2025-12-23T09:33:52.356665Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:52.290515307Z", + "start_time": "2025-12-23T09:33:52.29058691Z", + "end_time": "2025-12-23T09:33:52.290734216Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:52.290454005Z", + "start_time": "2025-12-23T09:33:52.290528008Z", + "end_time": "2025-12-23T09:33:52.290569109Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4138, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/00359f516cdf8b1800c7102711bd9aa400d1c749.json b/data/output/00359f516cdf8b1800c7102711bd9aa400d1c749.json new file mode 100644 index 0000000..47d2898 --- /dev/null +++ b/data/output/00359f516cdf8b1800c7102711bd9aa400d1c749.json @@ -0,0 +1,416 @@ +{ + "file_name": "00359f516cdf8b1800c7102711bd9aa400d1c749.txt", + "total_words": 919, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "to", + "count": 33 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "he", + "count": 21 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "was", + "count": 13 + }, + { + "word": "ice", + "count": 11 + }, + { + "word": "s", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Dr.", + "length": 3 + }, + { + "text": "...", + "length": 3 + }, + { + "text": "through my clothes.", + "length": 19 + }, + { + "text": "\" E-mail to a friend .", + "length": 22 + }, + { + "text": "But here, it's happening on U.", + "length": 30 + }, + { + "text": "\"What they did to me was very, very bad.", + "length": 40 + }, + { + "text": "See Diouf's stay of deportation document .", + "length": 42 + }, + { + "text": "Soeoth says he's traumatized by what happened.", + "length": 46 + }, + { + "text": "Watch why the former detainees claim abuse » .", + "length": 47 + }, + { + "text": "See the document that shows Soeoth was injected .", + "length": 49 + }, + { + "text": "soil to an immigrant the government is trying to deport.", + "length": 56 + }, + { + "text": "Public Health Service, may prescribe or administer medication.", + "length": 62 + }, + { + "text": "\"I know this country [is] very generous to immigrants,\" he says.", + "length": 64 + }, + { + "text": "They are seeking an end to the alleged practice and unspecified damages.", + "length": 72 + }, + { + "text": "CNN's Wayne Drash, Traci Tamura and Gregg Cane contributed to this report.", + "length": 74 + }, + { + "text": "But both Soeoth and Diouf say they had not exhibited any combative behavior.", + "length": 76 + }, + { + "text": "\" A government report says he was medicated because he did not follow orders.", + "length": 77 + }, + { + "text": "\"Only trained and qualified medical professionals, including officers of the U.", + "length": 79 + }, + { + "text": "\" But, Diouf says, he was injected on the plane right before he was to be deported.", + "length": 83 + }, + { + "text": "\" The allegations of ICE forcibly drugging deportees were raised last month by Sen.", + "length": 83 + }, + { + "text": "If they lose, they may land back in the hands of ICE, once again facing deportation.", + "length": 84 + }, + { + "text": "Joe Lieberman, I-Connecticut, during the re-nomination hearing of ICE chief Julie Myers.", + "length": 88 + }, + { + "text": "In both cases, Diouf and Soeoth remain in the United States pending a decision in the case.", + "length": 91 + }, + { + "text": "Doctors say they are required to see patients in person before such drugs are administered.", + "length": 91 + }, + { + "text": "\"That is the sort of thing that would be subject to a malpractice claim in the civilian world.", + "length": 94 + }, + { + "text": "He said he even had a federal stay of his deportation -- and the paperwork to prove it -- but his U.", + "length": 100 + }, + { + "text": "ICE said in a written statement it couldn't respond to specific allegations due to pending litigation.", + "length": 102 + }, + { + "text": "Raymond Soeoth, pictured here with his wife, says he was injected with drugs by ICE agents against his will.", + "length": 108 + }, + { + "text": "government escorts wouldn't let him show it to the pilot of the plane preparing to fly him out of the country.", + "length": 110 + }, + { + "text": "She added no detainee should be \"involuntarily medicated without court order,\" except in emergency situations.", + "length": 110 + }, + { + "text": "Both are plaintiffs in a class-action lawsuit brought by the American Civil Liberties Union against the government.", + "length": 115 + }, + { + "text": "\"They pushed me on the bench, they opened my pants, and they just give me injection,\" he said through broken English.", + "length": 117 + }, + { + "text": "From October last year to the end of April this year, she said 56 received psychotropic medications during the removal process.", + "length": 127 + }, + { + "text": "\"Senator Lieberman intends to follow up with ICE to ensure that detainees are not drugged unless there is a medical reason to do so.", + "length": 132 + }, + { + "text": "Terminal Island, once a federal prison, is a crowded facility along the ocean where hundreds of illegal immigrants await deportation.", + "length": 133 + }, + { + "text": "\" Responding to Lieberman's written questions, Myers said 1,073 immigration detainees had \"medical escorts\" for deportation since 2003.", + "length": 135 + }, + { + "text": "Of those, 33 detainees received medication \"because of combative behavior with the imminent risk of danger to others and/or self,\" she said.", + "length": 140 + }, + { + "text": "One of the drugs in question is the potent anti-psychotic drug Haldol, which is often used to treat schizophrenia or other mental illnesses.", + "length": 140 + }, + { + "text": "\"First, I am aware of, and deeply concerned about reports that past practices may not have conformed to ICE detention standards,\" Myers said.", + "length": 141 + }, + { + "text": "Soeoth, a Christian minister from Indonesia, spent 27 months in detention awaiting deportation after his bid for political asylum was rejected.", + "length": 143 + }, + { + "text": "Hours before he was to be sent back home on December 7, 2004, he says guards injected him with a mystery drug that made him groggy for two days.", + "length": 144 + }, + { + "text": "Soeoth's medical records indicate he was injected with Cogentin and Haldol, even though those same records show he has no history of mental illness.", + "length": 148 + }, + { + "text": "Two immigrants, Raymond Soeoth of Indonesia and Amadou Diouf of Senegal in West Africa, told CNN they were injected with the drugs against their will.", + "length": 150 + }, + { + "text": "\"Department of Homeland Security law enforcement personnel may not and do not prescribe or administer medication to detainees,\" the ICE statement said.", + "length": 151 + }, + { + "text": "In the records, the government says he was injected with the drug after he said he would kill himself if deported -- a remark Soeoth denies ever making.", + "length": 152 + }, + { + "text": "Paul Appelbaum, a professor of psychiatry, law and ethics at Columbia University, reviewed both men's medical records for this report and was stunned by what he discovered.", + "length": 172 + }, + { + "text": "\" ACLU attorney Ahilan Arulanantham, who is representing Soeoth and Diouf, said, \"It would be torture to give a powerful anti-psychotic drug to somebody who isn't even mentally ill.", + "length": 181 + }, + { + "text": "\"I'm really shocked to find out that the government has been using physicians and using potent medications in this way,\" said Appelbaum, who also serves as a member of the American Academy of Psychiatry and the Law.", + "length": 215 + }, + { + "text": "He says he was taken to Los Angeles International Airport while in this drug-induced stupor, but two hours before takeoff, airline security refused to transport him, so ICE agents returned him to his cell at Terminal Island near Los Angeles.", + "length": 241 + }, + { + "text": "LOS ANGELES, California (CNN) -- Former detainees of Immigration and Customs Enforcement accuse the agency in a lawsuit of forcibly injecting them with psychotropic drugs while trying to shuttle them out of the country during their deportation.", + "length": 244 + }, + { + "text": "\"The information the committee has received from ICE regarding the forced drugging of immigration detainees is extremely troubling, particularly since it appears ICE may have violated its own detention standards,\" Lieberman spokeswoman Leslie Phillips told CNN in an e-mail.", + "length": 274 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7786752581596375 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:52.790823726Z", + "first_section_created": "2025-12-23T09:33:52.791658657Z", + "last_section_published": "2025-12-23T09:33:52.792021771Z", + "all_results_received": "2025-12-23T09:33:52.900977769Z", + "output_generated": "2025-12-23T09:33:52.901235979Z", + "total_processing_time_ms": 110, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 108, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:52.791658657Z", + "publish_time": "2025-12-23T09:33:52.791929067Z", + "first_worker_start": "2025-12-23T09:33:52.79253079Z", + "last_worker_end": "2025-12-23T09:33:52.872885Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:52.792606993Z", + "start_time": "2025-12-23T09:33:52.792689896Z", + "end_time": "2025-12-23T09:33:52.7927906Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:52.793016Z", + "start_time": "2025-12-23T09:33:52.793281Z", + "end_time": "2025-12-23T09:33:52.872885Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:52.792463587Z", + "start_time": "2025-12-23T09:33:52.79253079Z", + "end_time": "2025-12-23T09:33:52.792635394Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:52.792567291Z", + "start_time": "2025-12-23T09:33:52.792650395Z", + "end_time": "2025-12-23T09:33:52.792725497Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:52.791965069Z", + "publish_time": "2025-12-23T09:33:52.792021771Z", + "first_worker_start": "2025-12-23T09:33:52.792636694Z", + "last_worker_end": "2025-12-23T09:33:52.900025Z", + "total_journey_time_ms": 108, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:52.792651295Z", + "start_time": "2025-12-23T09:33:52.792694396Z", + "end_time": "2025-12-23T09:33:52.792710397Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:52.792898Z", + "start_time": "2025-12-23T09:33:52.793048Z", + "end_time": "2025-12-23T09:33:52.900025Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 106 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:52.792611293Z", + "start_time": "2025-12-23T09:33:52.792636694Z", + "end_time": "2025-12-23T09:33:52.792652695Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:52.792567291Z", + "start_time": "2025-12-23T09:33:52.792645794Z", + "end_time": "2025-12-23T09:33:52.792652895Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 185, + "min_processing_ms": 79, + "max_processing_ms": 106, + "avg_processing_ms": 92, + "median_processing_ms": 106, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2758, + "slowest_section_id": 1, + "slowest_section_time_ms": 108 + } +} diff --git a/data/output/0035fb674f89599a34007e64d88886d6af5e12a2.json b/data/output/0035fb674f89599a34007e64d88886d6af5e12a2.json new file mode 100644 index 0000000..9d2f1b0 --- /dev/null +++ b/data/output/0035fb674f89599a34007e64d88886d6af5e12a2.json @@ -0,0 +1,442 @@ +{ + "file_name": "0035fb674f89599a34007e64d88886d6af5e12a2.txt", + "total_words": 723, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "he", + "count": 25 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "his", + "count": 19 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "leeds", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "was", + "count": 14 + }, + { + "word": "said", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "'He .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Supt .", + "length": 6 + }, + { + "text": "hands.", + "length": 6 + }, + { + "text": "Monday.", + "length": 7 + }, + { + "text": "Leeds .", + "length": 7 + }, + { + "text": "Robert .", + "length": 8 + }, + { + "text": "Leeds, .", + "length": 8 + }, + { + "text": "complain.", + "length": 9 + }, + { + "text": "the face.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "endangered.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "bank account.", + "length": 13 + }, + { + "text": "30am on Monday.", + "length": 15 + }, + { + "text": "Mr Warner said: .", + "length": 17 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "09:49 EST, 25 May 2013 .", + "length": 24 + }, + { + "text": "05:28 EST, 25 May 2013 .", + "length": 24 + }, + { + "text": "' Married Leeds returned at 9.", + "length": 30 + }, + { + "text": "Leeds went to the job centre, Kiln .", + "length": 36 + }, + { + "text": "' Norwich job centre refused to comment.", + "length": 40 + }, + { + "text": "Jailed: Leeds was sent down for 15 months .", + "length": 43 + }, + { + "text": "Impact: The Rover drives through the entrance.", + "length": 46 + }, + { + "text": "crimes to feed his drugs habit, the court heard.", + "length": 48 + }, + { + "text": "Mr Clare said Leeds has a wife and daughter aged nine.", + "length": 54 + }, + { + "text": "don't get my money I will drive my car into your doors\".", + "length": 56 + }, + { + "text": "Big clean-up: The smashed doors of the Norwich job centre .", + "length": 59 + }, + { + "text": "Rage: Leeds is just a second away from crashing into the doors.", + "length": 63 + }, + { + "text": "He told police he was angry and did not regret what he had done.", + "length": 64 + }, + { + "text": "continued to drive his car down the stairs at the front of the .", + "length": 64 + }, + { + "text": "speak to the manager but was told he needed to use the phone to .", + "length": 65 + }, + { + "text": "of Norwich, was jailed for 15 months after he admitted dangerous .", + "length": 66 + }, + { + "text": "Warner, prosecuting, said the incident happened three days after .", + "length": 66 + }, + { + "text": "driving and criminal damage being reckless as to whether life was .", + "length": 67 + }, + { + "text": "Dave Marshall, of Norfolk Police, said last night: 'His actions put .", + "length": 69 + }, + { + "text": "said he wanted to hurt the jobcentre and said if he did not have his .", + "length": 70 + }, + { + "text": "'However, he became aggressive and irate and was heard to say, \"If I .", + "length": 70 + }, + { + "text": "catalogue of crime included a wounding with intent to cause grievous .", + "length": 70 + }, + { + "text": "benefit money he may as well be locked up as he would commit further .", + "length": 70 + }, + { + "text": "jobcentre through the glass front door right through into the entrance.", + "length": 71 + }, + { + "text": "He was furious after his dole money wasn't paid into his bank account .", + "length": 71 + }, + { + "text": "University Hospital where he used a multi-tool to slash his victim in .", + "length": 71 + }, + { + "text": "seething serial offender, convicted of 83 different offences, tried to .", + "length": 72 + }, + { + "text": "Mr Warner said he had been convicted 18 times for 83 different offences.", + "length": 72 + }, + { + "text": "as a warning to other people considering taking matters into their own .", + "length": 72 + }, + { + "text": "jobless Leeds discovered his unemployment benefit had not gone into his .", + "length": 73 + }, + { + "text": "House in Pottergate, to make a complaint about his benefits but when he .", + "length": 73 + }, + { + "text": "members of the public and workers at risk and I hope this sentence acts .", + "length": 73 + }, + { + "text": "turned up late on Friday afternoon he was asked to return the following .", + "length": 73 + }, + { + "text": "bodily harm charge in July 2004 after an incident at Norfolk and Norwich .", + "length": 74 + }, + { + "text": "Carnage: Leeds' Rover after the crash, as police being their investigations .", + "length": 77 + }, + { + "text": "CCTV footage of the drama on March 25 was played at Norwich Crown Court on Friday.", + "length": 82 + }, + { + "text": "Leeds, a drug addict and career criminal, had threatened the attack earlier in the day .", + "length": 88 + }, + { + "text": "Mr Clare said Leeds 'faces a number of difficulties in life, not least his heroin addiction'.", + "length": 93 + }, + { + "text": "' Leeds, who stopped just 4ft from the reception desk, was bundled out of his car by security staff.", + "length": 100 + }, + { + "text": "Narrow escape: The car ended up just 4ft from the reception and came inches from hitting people inside .", + "length": 104 + }, + { + "text": "Recorder Christopher Morgan accepted Leeds' frustration spilled over - but said he was 'clearly reckless'.", + "length": 106 + }, + { + "text": "As well as being jailed he was banned from driving for a year and ordered to pay a £100 victim surcharge.", + "length": 106 + }, + { + "text": "Michael Clare, defending, said the latest offences were 'out of all proportion to the frustration he felt'.", + "length": 107 + }, + { + "text": "Ready to strike: Bystanders in the job centre look on as Leeds gets ready to ram his Rover into the front entrance .", + "length": 116 + }, + { + "text": "He said it was an understatement to say he was frustrated and found the 'morass of bureaucracy' he faced 'exasperating'.", + "length": 120 + }, + { + "text": "'He returned to his vehicle and then drove out of the car park across Pottergate, narrowly missing passers-by,' said Mr Warner.", + "length": 127 + }, + { + "text": "This is the shocking moment a career criminal drives his car into a job centre in a fit of rage after not receiving his dole money.", + "length": 131 + }, + { + "text": "Impact: Christopher Leeds' drives his Rover through front entrance of a job centre in Norwich in a fit of rage after he wasn't paid his benefits .", + "length": 146 + }, + { + "text": "Christopher Leeds, 35, ploughed his Rover straight through the front of the packed building in Norwich  - stopping just 4ft short of the reception desk.", + "length": 153 + }, + { + "text": "The father narrowly avoided passers-by as he sped out of a city centre car park over the road, roared down a set of steps and crashed into the Jobcentre Plus.", + "length": 158 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6411428451538086 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:53.292513096Z", + "first_section_created": "2025-12-23T09:33:53.29288471Z", + "last_section_published": "2025-12-23T09:33:53.293114219Z", + "all_results_received": "2025-12-23T09:33:53.36386468Z", + "output_generated": "2025-12-23T09:33:53.364069388Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:53.29288471Z", + "publish_time": "2025-12-23T09:33:53.293114219Z", + "first_worker_start": "2025-12-23T09:33:53.293776144Z", + "last_worker_end": "2025-12-23T09:33:53.362949Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:53.293781944Z", + "start_time": "2025-12-23T09:33:53.293886948Z", + "end_time": "2025-12-23T09:33:53.293999752Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:53.294009Z", + "start_time": "2025-12-23T09:33:53.294144Z", + "end_time": "2025-12-23T09:33:53.362949Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:53.293705341Z", + "start_time": "2025-12-23T09:33:53.293776144Z", + "end_time": "2025-12-23T09:33:53.293858647Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:53.293740242Z", + "start_time": "2025-12-23T09:33:53.293816945Z", + "end_time": "2025-12-23T09:33:53.293850247Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4143, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/0036c48d80c270465bffced3e233fe39e5950431.json b/data/output/0036c48d80c270465bffced3e233fe39e5950431.json new file mode 100644 index 0000000..394e993 --- /dev/null +++ b/data/output/0036c48d80c270465bffced3e233fe39e5950431.json @@ -0,0 +1,222 @@ +{ + "file_name": "0036c48d80c270465bffced3e233fe39e5950431.txt", + "total_words": 284, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "titanic", + "count": 10 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "at", + "count": 5 + }, + { + "word": "cruise", + "count": 5 + }, + { + "word": "a", + "count": 4 + }, + { + "word": "april", + "count": 4 + }, + { + "word": "as", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "M.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Prices for the trip start at $3,900.", + "length": 36 + }, + { + "text": "A list of first class passengers for the R.", + "length": 43 + }, + { + "text": "Dean's brother and mother also survived the sinking.", + "length": 52 + }, + { + "text": "The trip will end in New York, where the Titanic was headed.", + "length": 60 + }, + { + "text": "Titanic is one of the artifacts that remains after the sinking.", + "length": 63 + }, + { + "text": "The ship sank less than three hours later, killing more than 1,500 people.", + "length": 74 + }, + { + "text": "Millvina Dean, thought to be the last survivor of the Titanic, died in June 2009 at age 97, according to friends.", + "length": 113 + }, + { + "text": "Passengers on the 2012 cruise will take part in a memorial service at the site, according to organizer Miles Morgan Travel.", + "length": 123 + }, + { + "text": "The Titanic Memorial Cruise is to set sail in April 2012, departing from Southampton, England, on April 8, just as the Titanic did.", + "length": 131 + }, + { + "text": "On April 15, the ship -- the Balmoral -- will arrive at the spot in the North Atlantic where the Titanic sank after it collided with an iceberg.", + "length": 144 + }, + { + "text": "Artifacts from the Titanic and a piece of the ship's hull have been recovered, but most of the wreckage remains where the luxury cruise liner sank.", + "length": 147 + }, + { + "text": "The 12-night memorial cruise will then take passengers to Halifax in Nova Scotia, Canada, so they can visit cemeteries where some of the Titanic victims are buried.", + "length": 164 + }, + { + "text": "(CNN) -- A memorial cruise is scheduled to set sail 100 years after the sinking of the Titanic, following the same trans-Atlantic route as the ill-fated ship, according to organizers.", + "length": 183 + }, + { + "text": "Dean was an infant when the Titanic -- publicized as \"practically unsinkable\" and as the largest passenger steamship at the time -- struck an iceberg on the night of April 14, 1912, during its maiden voyage from Southampton in southern England to New York.", + "length": 256 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5051975846290588 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:53.793926156Z", + "first_section_created": "2025-12-23T09:33:53.795848129Z", + "last_section_published": "2025-12-23T09:33:53.796020935Z", + "all_results_received": "2025-12-23T09:33:53.860190849Z", + "output_generated": "2025-12-23T09:33:53.860309253Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:53.795848129Z", + "publish_time": "2025-12-23T09:33:53.796020935Z", + "first_worker_start": "2025-12-23T09:33:53.796618658Z", + "last_worker_end": "2025-12-23T09:33:53.859302Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:53.796575256Z", + "start_time": "2025-12-23T09:33:53.796657159Z", + "end_time": "2025-12-23T09:33:53.79669306Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:53.79676Z", + "start_time": "2025-12-23T09:33:53.796891Z", + "end_time": "2025-12-23T09:33:53.859302Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:53.796551355Z", + "start_time": "2025-12-23T09:33:53.796618658Z", + "end_time": "2025-12-23T09:33:53.79668006Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:53.796570856Z", + "start_time": "2025-12-23T09:33:53.796649159Z", + "end_time": "2025-12-23T09:33:53.79667026Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1606, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0036d1138155e2644813f9c218c743e982e27a27.json b/data/output/0036d1138155e2644813f9c218c743e982e27a27.json new file mode 100644 index 0000000..644741e --- /dev/null +++ b/data/output/0036d1138155e2644813f9c218c743e982e27a27.json @@ -0,0 +1,222 @@ +{ + "file_name": "0036d1138155e2644813f9c218c743e982e27a27.txt", + "total_words": 283, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "of", + "count": 5 + }, + { + "word": "on", + "count": 5 + }, + { + "word": "s", + "count": 5 + }, + { + "word": "bang", + "count": 4 + }, + { + "word": "big", + "count": 4 + }, + { + "word": "for", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Warner Bros.", + "length": 12 + }, + { + "text": "\" Warner Bros.", + "length": 14 + }, + { + "text": "is owned by CNN's parent company.", + "length": 33 + }, + { + "text": "The popular series was renewed in March.", + "length": 40 + }, + { + "text": "'The Big Bang Theory' actors get $25,000 an hour .", + "length": 50 + }, + { + "text": "\"Big Bang Theory\" fans can breathe a sigh of relief.", + "length": 52 + }, + { + "text": "Television told CNN that \"we aren't commenting on the speculation at this time.", + "length": 79 + }, + { + "text": "It's been said that the trio previously received more than $300,000 per episode.", + "length": 80 + }, + { + "text": "\" According to reports, the trio will also have an increased stake in the show's backend profits.", + "length": 97 + }, + { + "text": "According to Deadline, the stars of the hit CBS series have agreed to hefty pay increases for the new season.", + "length": 109 + }, + { + "text": "Mayim Bialik and Melissa Rauch, who also appear on the show, are reported to have received raises in September.", + "length": 111 + }, + { + "text": "\" Earlier, the company confirmed that \"ongoing contract negotiations\" had caused production on \"The Big Bang Theory's\" eighth season to be postponed.", + "length": 149 + }, + { + "text": "The Hollywood Reporter confirmed the deal in its story and said co-stars Kunal Nayyar and Simon Helberg were still in negotiations, but expected to sign a deal soon.", + "length": 166 + }, + { + "text": "The new deals would put Parsons, Galecki and Cuoco in the same territory as the former cast of \"Friends,\" who also negotiated for pay raises at the height of their show's popularity.", + "length": 182 + }, + { + "text": "Quoting unnamed sources, the publication reports that Jim Parsons, Johnny Galecki and Kaley Cuoco have secured three-year deals for \"$1 million per episode for the 72 episodes the show is slated to produce in Seasons 8-10.", + "length": 222 + }, + { + "text": "Production had been scheduled to begin July 30 and on Tuesday the company released a statement saying \"Production on season 8 of 'The Big Bang Theory' will begin Wednesday, August 6, with contract negotiations now having been concluded.", + "length": 237 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.4244690239429474 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:54.296786071Z", + "first_section_created": "2025-12-23T09:33:54.298265626Z", + "last_section_published": "2025-12-23T09:33:54.298442833Z", + "all_results_received": "2025-12-23T09:33:54.362760252Z", + "output_generated": "2025-12-23T09:33:54.363013962Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:54.298265626Z", + "publish_time": "2025-12-23T09:33:54.298442833Z", + "first_worker_start": "2025-12-23T09:33:54.299006754Z", + "last_worker_end": "2025-12-23T09:33:54.361845Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:54.298939452Z", + "start_time": "2025-12-23T09:33:54.299009754Z", + "end_time": "2025-12-23T09:33:54.299044756Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:54.299263Z", + "start_time": "2025-12-23T09:33:54.299411Z", + "end_time": "2025-12-23T09:33:54.361845Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:54.298960853Z", + "start_time": "2025-12-23T09:33:54.299006754Z", + "end_time": "2025-12-23T09:33:54.299086857Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:54.299004854Z", + "start_time": "2025-12-23T09:33:54.299080857Z", + "end_time": "2025-12-23T09:33:54.299105558Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1645, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/00374fa05aac65564c93a58356abe3b7856e1794.json b/data/output/00374fa05aac65564c93a58356abe3b7856e1794.json new file mode 100644 index 0000000..cb9be47 --- /dev/null +++ b/data/output/00374fa05aac65564c93a58356abe3b7856e1794.json @@ -0,0 +1,716 @@ +{ + "file_name": "00374fa05aac65564c93a58356abe3b7856e1794.txt", + "total_words": 1386, + "top_n_words": [ + { + "word": "the", + "count": 84 + }, + { + "word": "to", + "count": 45 + }, + { + "word": "of", + "count": 35 + }, + { + "word": "a", + "count": 32 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "s", + "count": 25 + }, + { + "word": "at", + "count": 22 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "heathrow", + "count": 15 + }, + { + "word": "athletes", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "If .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Then .", + "length": 6 + }, + { + "text": "home .", + "length": 6 + }, + { + "text": "Smile!", + "length": 6 + }, + { + "text": "Instead .", + "length": 9 + }, + { + "text": "shopping.", + "length": 9 + }, + { + "text": "tomorrow.", + "length": 9 + }, + { + "text": "How many?", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "from London.", + "length": 12 + }, + { + "text": "22-year-old.", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Not over yet?", + "length": 13 + }, + { + "text": "champions too.", + "length": 14 + }, + { + "text": "David Wilkes .", + "length": 14 + }, + { + "text": "departure dash.", + "length": 15 + }, + { + "text": "Cue plenty of .", + "length": 15 + }, + { + "text": "Farewell London!", + "length": 16 + }, + { + "text": "Kicking back: U.", + "length": 16 + }, + { + "text": "’ Not so upbeat?", + "length": 18 + }, + { + "text": "‘Hello, Malaysia!", + "length": 19 + }, + { + "text": "‘It’s been an .", + "length": 19 + }, + { + "text": "Is that an airport?", + "length": 19 + }, + { + "text": "to lift them again.", + "length": 19 + }, + { + "text": "That's more like it!", + "length": 20 + }, + { + "text": "pretend boarding it.", + "length": 20 + }, + { + "text": "gentle out of the ring.", + "length": 23 + }, + { + "text": "’ bellowed one from a .", + "length": 25 + }, + { + "text": "‘The Games were awesome.", + "length": 26 + }, + { + "text": "'I didn’t make it into .", + "length": 26 + }, + { + "text": "03:11 EST, 14 August 2012 .", + "length": 27 + }, + { + "text": "No aircraft depart from it.", + "length": 27 + }, + { + "text": "18:46 EST, 10 August 2012 .", + "length": 27 + }, + { + "text": "The people were so good to us.", + "length": 30 + }, + { + "text": "Nearby, a couple of Portuguese .", + "length": 32 + }, + { + "text": "feel-good factor was everywhere.", + "length": 32 + }, + { + "text": "I bought all sorts of souvenirs.", + "length": 32 + }, + { + "text": "In here, you rub shoulders with .", + "length": 33 + }, + { + "text": "‘They are so cute,’ said the .", + "length": 34 + }, + { + "text": "It will also be in use today and .", + "length": 34 + }, + { + "text": "But otherwise, the temporary Games .", + "length": 36 + }, + { + "text": "The athletes are then clapped into .", + "length": 36 + }, + { + "text": "About 6,000 athletes and a further .", + "length": 36 + }, + { + "text": "terminal yesterday, its busiest day.", + "length": 36 + }, + { + "text": "‘The Olympics here has been so fine.", + "length": 38 + }, + { + "text": "No need, of course, for 4ft 11in Jun .", + "length": 38 + }, + { + "text": "Faux neo-classical columns arched the .", + "length": 39 + }, + { + "text": "Onward to security, where the conveyor .", + "length": 40 + }, + { + "text": "For the athletes, leaving London was a .", + "length": 40 + }, + { + "text": "adorned with iconic London street signs.", + "length": 40 + }, + { + "text": "‘She’s always been supportive of me .", + "length": 41 + }, + { + "text": "were pretending to ride on two model stags.", + "length": 43 + }, + { + "text": ": Sweden's Jenny Fransson, left, pictured .", + "length": 43 + }, + { + "text": "Taylor was boarding a flight back to Dublin.", + "length": 44 + }, + { + "text": "’ Russia’s Maria Savinova, 27, who won .", + "length": 44 + }, + { + "text": "Yesterday I went to Big Ben – so beautiful.", + "length": 45 + }, + { + "text": "‘It’s going to be a bit mad, so I’m a .", + "length": 45 + }, + { + "text": "traditional British event – the marathon queue.", + "length": 49 + }, + { + "text": "and has been living for this moment,’ Katie said.", + "length": 51 + }, + { + "text": "Pictured the famous rings above the arrivals board .", + "length": 52 + }, + { + "text": "Belgian judo Bronze medalist Charline van Snick at St.", + "length": 54 + }, + { + "text": "And there was no need to test your endurance in that .", + "length": 54 + }, + { + "text": "I have to say to Britain – you guys did a great job.", + "length": 54 + }, + { + "text": "Yesterday, Ireland’s gold medal-winning boxer Katie .", + "length": 55 + }, + { + "text": "sentry box, as some of that country’s divers strolled in.", + "length": 59 + }, + { + "text": "incredible Games and I have been privileged to be part of it.", + "length": 61 + }, + { + "text": "like a park (complete with more staff dressed as park wardens).", + "length": 63 + }, + { + "text": "can mingle with regular passengers as they wait for their flight.", + "length": 65 + }, + { + "text": "'All the volunteers were so friendly and gave us a lot of support.", + "length": 66 + }, + { + "text": "walk in the park (almost literally, given the terminal’s decor).", + "length": 66 + }, + { + "text": "the semi-finals but I had a great time, walking around London and .", + "length": 67 + }, + { + "text": "gold in the 800m, said: ‘It’s the best Olympics, I like London.", + "length": 67 + }, + { + "text": "Hoong Cheong, from Malaysia’s 3m springboard diving team, to duck.", + "length": 68 + }, + { + "text": "Terminal (basically a huge tent the size of three Olympic swimming .", + "length": 68 + }, + { + "text": "Pancras International in London ready to jump on the Eurostar home .", + "length": 68 + }, + { + "text": "back of a double-decker Routemaster bus (Number: 2012; Destination: .", + "length": 69 + }, + { + "text": "pools) meant it was a day for ‘personal bests’ all round in the .", + "length": 69 + }, + { + "text": "Procession: Actors lead a member of the Yemen team through Heathrow .", + "length": 69 + }, + { + "text": "luck in their future sporting endeavours in one last golden goodbye .", + "length": 69 + }, + { + "text": "9,000 Olympic coaches and officials were expected through the Games .", + "length": 69 + }, + { + "text": "Relaxing: Gatlin reclines as he waits for his flight back to the USA .", + "length": 70 + }, + { + "text": "athletes were delighted by the near life-size cardboard cutout of the .", + "length": 71 + }, + { + "text": "London) and snapped away on their cameras as they took it in turns to .", + "length": 71 + }, + { + "text": "bit nervous,’ she said at the prospect of the almighty craic set to .", + "length": 71 + }, + { + "text": "The Dutch team gathered at St Pancras as they waited for the Eurostar .", + "length": 71 + }, + { + "text": "athletes’ route through to an astro-turfed lounge decked out to look .", + "length": 72 + }, + { + "text": "belts through the scanners were decorated to look like brick walls and .", + "length": 72 + }, + { + "text": "with a team mate and German boxing coach Valentin Silaghi, right, pose .", + "length": 72 + }, + { + "text": "up for photos at the Games Terminal at Heathrow Airport before heading .", + "length": 72 + }, + { + "text": "Tumua Anae, 23, of the US’s gold medal-winning water polo team, said: .", + "length": 73 + }, + { + "text": "athletes take an airside coach to their final departure point where they .", + "length": 74 + }, + { + "text": "anyone’s spirits did flag, there were guardsmen in bearskins ever ready .", + "length": 75 + }, + { + "text": "The Paralympics will take place in the Olympic Park over the coming weeks .", + "length": 75 + }, + { + "text": "French athletes got in the spirit of things at St Pancras before departing .", + "length": 76 + }, + { + "text": "off she went for more photos, this time with another two ‘guardsmen’ who .", + "length": 78 + }, + { + "text": "each terminal by a ‘guard of honour’ of Heathrow volunteers to wish them .", + "length": 78 + }, + { + "text": "Busy day: More than 100,000 people are attempting to leave the capital today .", + "length": 78 + }, + { + "text": "erupt on her return to the Emerald Isle, adding with a smile: ‘I’m quite .", + "length": 78 + }, + { + "text": "Join the queue: Passengers wait in line to catch the Eurostar as the exodus begins .", + "length": 84 + }, + { + "text": "Ghost town: Olympic Park looks like a different place as there isn't a soul in sight .", + "length": 86 + }, + { + "text": "To see the athletes off from Heathrow, the airport set up a make-shift Games terminal .", + "length": 87 + }, + { + "text": "Golden girl: Ireland Gold medallist Katie Taylor seemed to be enjoying the new terminal .", + "length": 89 + }, + { + "text": "Members of the French team seemed to be sleeping off the closing ceremony at St Pancras .", + "length": 89 + }, + { + "text": "Farewell: A specially-constructed Games terminal at Heathrow was in full operation today .", + "length": 90 + }, + { + "text": "Ranomi Kromowidjojo shows her two gold and one silver swimming medals one last time in the UK .", + "length": 95 + }, + { + "text": "Justin Gatlin arrived at Heathrow (left) while France's Marlene Harnois waved the flag at St Pancras .", + "length": 102 + }, + { + "text": "’ Time to go: The Russian rhythmic gymnastics team arrived in Heathrow before they left the capital .", + "length": 103 + }, + { + "text": "photos for the scrapbook as they each posed with the ‘soldier’ (actually, like the others, an actor).", + "length": 105 + }, + { + "text": "’ New look: Athletes began to arrive to leave London at the temporary Games Terminal at Heathrow Airport .", + "length": 108 + }, + { + "text": "' She could not wait to see her family, especially her grandmother Kathleen Cranley, who has just turned 80.", + "length": 108 + }, + { + "text": "If they awarded Olympic golds for ‘heaviest bags under the eyes’, there would have been many contenders.", + "length": 108 + }, + { + "text": "There were long queues at St Pancras for the Eurostar, but the Dutch team (right) didn't seem to mind too much .", + "length": 112 + }, + { + "text": "Memories: A member of the Kazakhstan contingent takes a picture of a tree with Games moments hung on its branches .", + "length": 115 + }, + { + "text": "swimmer Ryan Lochte tweeted: 'It's been fun London but now it's time to get back to the good ole USA --lunch anyone??", + "length": 117 + }, + { + "text": "The softly spoken 26-year-old she was looking forward to going home after the ‘most incredible two weeks of my life’.", + "length": 121 + }, + { + "text": "Magical moment: Katie Taylor's father, Pete, left a touching message in the Games terminal before flying back to Ireland .", + "length": 122 + }, + { + "text": "Out on a high: Members of the Netherlands' women's hockey team brandished their gold medals at St Pancras station before they departed .", + "length": 136 + }, + { + "text": "Big send off: Athletes and Coaches from Sweden in the Olympic-themed Games Terminal, complete with fake grass and deer, at Heathrow Airport .", + "length": 141 + }, + { + "text": "Long time ago: It has been just three weeks since thousands of athletes landed at Heathrow for the Olympic Games on Monday they will head home.", + "length": 143 + }, + { + "text": "A lot of kit to carry: Olympic athletes departed from a specially built terminal at London Heathrow Airport to help cope during the busy period .", + "length": 145 + }, + { + "text": "A job well done: Gamesmaker volunteers walk out of the departure lounge at Heathrow's Terminal 4 after seeing off Athletes and Olympic officials .", + "length": 146 + }, + { + "text": "Home time: Members of the Bulgarian Olympic Team arrive with their luggage at Heathrow Airport in London - ready to fly home at the end of the Olympic Games .", + "length": 158 + }, + { + "text": "Waiting game: Germany's Robert Harting, gold medal winner in the men's discus, waits for his baggage at Heathrow airport after his plane was canceled due to a technical failure .", + "length": 178 + }, + { + "text": "But aside from trying to stay awake after the greatest time of their lives, the international athletes heading home from Heathrow yesterday faced few of the  wearisome obstacles found in airports.", + "length": 197 + }, + { + "text": "Granted, some 6ft 8in-plus members of Russia’s men’s volleyball team were forced to stoop their way through the scanners; and those proudly wearing their medals were politely asked to take them off and place them in the plastic trays along with the rest of their belongings to be security checked.", + "length": 301 + }, + { + "text": "Finally home: Ireland's Olympic Medal winners, from left to right, John Joe Nevin, Paddy Barnes Michael Conlon, Cian O'Connor and Katie Taylor arrive with the rest of the Irish team at Dublin Airport, left, and Irish boxer Katie Taylor holding her Olympic gold medal with a flag out of the cockpit of the plane, right .", + "length": 319 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.49074944853782654 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:54.799195093Z", + "first_section_created": "2025-12-23T09:33:54.800851658Z", + "last_section_published": "2025-12-23T09:33:54.801242573Z", + "all_results_received": "2025-12-23T09:33:54.947214981Z", + "output_generated": "2025-12-23T09:33:54.947813605Z", + "total_processing_time_ms": 148, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 145, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:54.800851658Z", + "publish_time": "2025-12-23T09:33:54.801121569Z", + "first_worker_start": "2025-12-23T09:33:54.801601087Z", + "last_worker_end": "2025-12-23T09:33:54.901368Z", + "total_journey_time_ms": 100, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:54.801973402Z", + "start_time": "2025-12-23T09:33:54.802042405Z", + "end_time": "2025-12-23T09:33:54.802152709Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:54.802182Z", + "start_time": "2025-12-23T09:33:54.802428Z", + "end_time": "2025-12-23T09:33:54.901368Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 98 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:54.801783195Z", + "start_time": "2025-12-23T09:33:54.801872498Z", + "end_time": "2025-12-23T09:33:54.801976702Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:54.801537685Z", + "start_time": "2025-12-23T09:33:54.801601087Z", + "end_time": "2025-12-23T09:33:54.801753193Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:54.80116147Z", + "publish_time": "2025-12-23T09:33:54.801242573Z", + "first_worker_start": "2025-12-23T09:33:54.801952201Z", + "last_worker_end": "2025-12-23T09:33:54.945057Z", + "total_journey_time_ms": 143, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:54.801848097Z", + "start_time": "2025-12-23T09:33:54.801952201Z", + "end_time": "2025-12-23T09:33:54.802061705Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:54.80217Z", + "start_time": "2025-12-23T09:33:54.802342Z", + "end_time": "2025-12-23T09:33:54.945057Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 142 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:54.802065306Z", + "start_time": "2025-12-23T09:33:54.802103707Z", + "end_time": "2025-12-23T09:33:54.802194511Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:54.802345217Z", + "start_time": "2025-12-23T09:33:54.802453421Z", + "end_time": "2025-12-23T09:33:54.802491222Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 240, + "min_processing_ms": 98, + "max_processing_ms": 142, + "avg_processing_ms": 120, + "median_processing_ms": 142, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4046, + "slowest_section_id": 1, + "slowest_section_time_ms": 143 + } +} diff --git a/data/output/0037543aa62330c22c061890640534c044c30fc2.json b/data/output/0037543aa62330c22c061890640534c044c30fc2.json new file mode 100644 index 0000000..8b0c9cb --- /dev/null +++ b/data/output/0037543aa62330c22c061890640534c044c30fc2.json @@ -0,0 +1,456 @@ +{ + "file_name": "0037543aa62330c22c061890640534c044c30fc2.txt", + "total_words": 1031, + "top_n_words": [ + { + "word": "the", + "count": 88 + }, + { + "word": "to", + "count": 40 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "at", + "count": 17 + }, + { + "word": "was", + "count": 16 + }, + { + "word": "were", + "count": 15 + }, + { + "word": "i", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "9.", + "length": 2 + }, + { + "text": "By A.", + "length": 5 + }, + { + "text": "At around 7.", + "length": 12 + }, + { + "text": "Where has he gone?", + "length": 18 + }, + { + "text": "Hyderabad explosions .", + "length": 22 + }, + { + "text": "He has not returned so far.", + "length": 27 + }, + { + "text": "Srinivasa Rao in Hyderabad .", + "length": 28 + }, + { + "text": "\"He told me to wait for some time.", + "length": 34 + }, + { + "text": "He is also not responding to my calls.", + "length": 38 + }, + { + "text": "Both the bicycles were mangled completely.", + "length": 42 + }, + { + "text": "In the meantime, loud explosions took place.", + "length": 44 + }, + { + "text": "during the bomb blasts at a Hyderabad hospital .", + "length": 48 + }, + { + "text": "As I was walking through, I heard the explosions.", + "length": 49 + }, + { + "text": "Unofficially, 22 people were killed in the blasts.", + "length": 50 + }, + { + "text": "A fourth bomb was defused before it could detonate.", + "length": 51 + }, + { + "text": "Several people were seriously injured crying for help.", + "length": 54 + }, + { + "text": "\"It was then I realised that there was a bomb explosion.", + "length": 56 + }, + { + "text": "The state police department put the preliminary toll at 11.", + "length": 59 + }, + { + "text": "Both of us were planning to return home by an autorickshaw.", + "length": 59 + }, + { + "text": "I don't know what happened to him,\" he said, crying loudly.", + "length": 59 + }, + { + "text": "A bus shelter has been ripped to pieces by the bomb blast .", + "length": 59 + }, + { + "text": "The condition of several of them is said to be very serious.", + "length": 60 + }, + { + "text": "\"I was just going to Shirdi Sai Baba temple, a furlong away.", + "length": 60 + }, + { + "text": "A Railway Police officer and sniffer dog check baggage train .", + "length": 62 + }, + { + "text": "\"My brother went to the other side of the road to buy footwear.", + "length": 63 + }, + { + "text": "There was no official word on who was behind the blasts, either.", + "length": 64 + }, + { + "text": "\"It was all blood and gore in the area and the scene was horrific.", + "length": 66 + }, + { + "text": "The LPG cylinder at the snack kiosk also exploded due to the blast.", + "length": 67 + }, + { + "text": "passengers at the main railway station in Ahmedabad following the .", + "length": 67 + }, + { + "text": "The sound was so deafening that I could not understand what was going on.", + "length": 73 + }, + { + "text": "According to eye-witness accounts, the first blast took place at around 6.", + "length": 74 + }, + { + "text": "The upcoming Hyderabad Metro Rail Project work is also in full swing in the area.", + "length": 81 + }, + { + "text": "Immediately, I rushed to the spot only to find pieces of flesh strewn all around.", + "length": 81 + }, + { + "text": "I tried to shift as many people as possible to the nearby Omni Hospital,\" he said.", + "length": 82 + }, + { + "text": "Within minutes of the incident, the police reached the spot and cordoned off the area.", + "length": 86 + }, + { + "text": "Babu Rao, a local Lok Satta Party leader, was one of the first people to reach the spot.", + "length": 88 + }, + { + "text": "Horror in Hyderabad: People search for survivors after three bomb blasts strike Dilsukhnagar .", + "length": 94 + }, + { + "text": "\"I was standing on the other side of the road waiting for a bus when I heard the bomb explode.", + "length": 94 + }, + { + "text": "Distraught: A relative reacts at the Omini hospital as panic spread through the city following the blasts .", + "length": 107 + }, + { + "text": "I had to pick up courage and shift the bodies into private vehicles to be taken to Osmania Hospital,\" he said.", + "length": 110 + }, + { + "text": "Investigating officers use a sniffer dog as they inspect the site of an explosion at Dilsukh Nagar, in Hyderabad .", + "length": 114 + }, + { + "text": "The blasts took place at two places within a distance of 100-120 metres at Dilsukhnagar on the National Highway No.", + "length": 115 + }, + { + "text": "The intensity of the explosion was so strong that the glass panes of the adjacent shopping malls were broken to pieces.", + "length": 119 + }, + { + "text": "Caught in the blasts: The first explosion took place shortly before 7pm when office workers were making their way home .", + "length": 120 + }, + { + "text": "Raju, another eye-witness, said he had seen the mangled bodies of three girls at the bus stand, where the blast took place.", + "length": 123 + }, + { + "text": "While some of the injured were rushed to Osmania Hospital, others were taken to nearby corporate and other private hospitals.", + "length": 125 + }, + { + "text": "The area is an extremely busy commercial zone with several shopping malls, cinema theatres, hospitals and educational institutions.", + "length": 131 + }, + { + "text": "I jumped over the road divider and reached the spot only to find blood-soaked bodies strewn all around and others writhing in pain.", + "length": 131 + }, + { + "text": "Although no group claimed the responsibility for the blasts, the use of bombs mounted on cycles had the imprint of Indian Mujahideen.", + "length": 133 + }, + { + "text": "The bus stand where passengers were waiting to return to their homes was blown off completely, sending victims' body parts flying into the air.", + "length": 143 + }, + { + "text": "01pm, before passersby could realise what exactly had happened, there were two other loud explosions at a bus stand adjacent to Venkatadri theatre.", + "length": 147 + }, + { + "text": "Singh sanctioned Rs 2 lakh each to next of kin of those killed in the blasts and Rs 50,000 each to those seriously injured from the PM's Relief Fund.", + "length": 149 + }, + { + "text": "\" Lakshmaiah, a private employee, was asking the police at Dilsukhnagar, an hour after three bomb explosions ripped through the area on Thursday evening.", + "length": 153 + }, + { + "text": "\"The condition of another seven to eight persons is said to be highly critical,\" Andhra Pradesh chief minister N Kiran Kumar Reddy told media late at night.", + "length": 156 + }, + { + "text": "58pm at a snack kiosk in front of Anand Tiffin Centre, an eatery located close to Konark theatre adjacent to the traffic circle, referred to as Rajiv Gandhi Chowk.", + "length": 163 + }, + { + "text": "Immediately, with the help of the others, we stopped a few auto rickshaws and sent the injured to the Yashoda Hospital,\" a local businessman Shivaji told Mail Today.", + "length": 165 + }, + { + "text": "Both locations are near the Dilsukhnagar bus terminus, which is a major junction carrying buses to the coastal Andhra region, as well as different parts of the city.", + "length": 165 + }, + { + "text": "Some of the victims who died in the explosion were identified as: Izad Ahmed, Vijaya Kumar, Rajasekhar, Ramulu, Mohammad Ali, Andalu, Krishnakanth, Rafi and Tirupati.", + "length": 166 + }, + { + "text": "\"We got the information that there were three more bombs planted in the locality and we immediately called for the bomb squad to search for them,\" a police official said.", + "length": 170 + }, + { + "text": "According to the preliminary investigation, the bombs were mounted on bicycles – one behind the bus stand and another near the Tiffin Centre, a kiosk selling tea and snacks.", + "length": 175 + }, + { + "text": "Lakshmaiah, who belonged to Timmapur village of Karimnagar district, said his brother Mahesh, 32, had crossed the road to reach a footwear shop adjacent to Venkatadri theatre.", + "length": 175 + }, + { + "text": "Strongly condemning the blasts in Hyderabad, Prime Minister Manmohan Singh tonight said those responsible for the dastardly act would not go unpunished as he appealed for peace.", + "length": 177 + }, + { + "text": "After a long lull, terrorists struck in Hyderabad with three bomb explosions in the thickly populated Dilsukhnagar area, killing at least 11 people and injuring more than 80 on Thursday evening.", + "length": 194 + }, + { + "text": "Two youths from Adilabad district, Vijay and Rajasekhar, who were being tutored for the sub-inspector recruitment test in Dilsukhnagar and were having snacks at a kiosk near Anand tiffin centre, died on the spot.", + "length": 212 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8005905449390411 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:55.301995054Z", + "first_section_created": "2025-12-23T09:33:55.302347168Z", + "last_section_published": "2025-12-23T09:33:55.302812986Z", + "all_results_received": "2025-12-23T09:33:55.39857963Z", + "output_generated": "2025-12-23T09:33:55.39882554Z", + "total_processing_time_ms": 96, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 95, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:55.302347168Z", + "publish_time": "2025-12-23T09:33:55.302618278Z", + "first_worker_start": "2025-12-23T09:33:55.303057995Z", + "last_worker_end": "2025-12-23T09:33:55.397844Z", + "total_journey_time_ms": 95, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:55.303149799Z", + "start_time": "2025-12-23T09:33:55.303230302Z", + "end_time": "2025-12-23T09:33:55.303334306Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:55.303519Z", + "start_time": "2025-12-23T09:33:55.303657Z", + "end_time": "2025-12-23T09:33:55.397844Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 94 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:55.303051295Z", + "start_time": "2025-12-23T09:33:55.303140399Z", + "end_time": "2025-12-23T09:33:55.303294205Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:55.302973892Z", + "start_time": "2025-12-23T09:33:55.303057995Z", + "end_time": "2025-12-23T09:33:55.303136198Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:55.302704282Z", + "publish_time": "2025-12-23T09:33:55.302812986Z", + "first_worker_start": "2025-12-23T09:33:55.3031759Z", + "last_worker_end": "2025-12-23T09:33:55.394524Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:55.303258203Z", + "start_time": "2025-12-23T09:33:55.303287204Z", + "end_time": "2025-12-23T09:33:55.303311905Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:55.303505Z", + "start_time": "2025-12-23T09:33:55.303646Z", + "end_time": "2025-12-23T09:33:55.394524Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:55.303204901Z", + "start_time": "2025-12-23T09:33:55.303237802Z", + "end_time": "2025-12-23T09:33:55.303271104Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:55.303139299Z", + "start_time": "2025-12-23T09:33:55.3031759Z", + "end_time": "2025-12-23T09:33:55.3031849Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 184, + "min_processing_ms": 90, + "max_processing_ms": 94, + "avg_processing_ms": 92, + "median_processing_ms": 94, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3002, + "slowest_section_id": 0, + "slowest_section_time_ms": 95 + } +} diff --git a/data/output/00377ab9d3caafb18464c47d0535ae2781aeef15.json b/data/output/00377ab9d3caafb18464c47d0535ae2781aeef15.json new file mode 100644 index 0000000..7d994ef --- /dev/null +++ b/data/output/00377ab9d3caafb18464c47d0535ae2781aeef15.json @@ -0,0 +1,432 @@ +{ + "file_name": "00377ab9d3caafb18464c47d0535ae2781aeef15.txt", + "total_words": 1119, + "top_n_words": [ + { + "word": "the", + "count": 66 + }, + { + "word": "in", + "count": 29 + }, + { + "word": "a", + "count": 27 + }, + { + "word": "and", + "count": 27 + }, + { + "word": "we", + "count": 24 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "that", + "count": 21 + }, + { + "word": "have", + "count": 19 + }, + { + "word": "said", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "Watch Dr.", + "length": 9 + }, + { + "text": "and worldwide » .", + "length": 18 + }, + { + "text": "\"It's a long journey.", + "length": 21 + }, + { + "text": "Learn about the virus » .", + "length": 26 + }, + { + "text": "View images of responses in U.", + "length": 30 + }, + { + "text": "Sanjay Gupta demystify pandemics » .", + "length": 37 + }, + { + "text": "\" Go behind the scenes at the CDC » .", + "length": 39 + }, + { + "text": "Denmark did not provide further details.", + "length": 40 + }, + { + "text": "See where cases have been confirmed » .", + "length": 40 + }, + { + "text": "\"Of course we would like to have a vaccine tomorrow.", + "length": 52 + }, + { + "text": "We would have wanted to have it yesterday,\" she said.", + "length": 53 + }, + { + "text": "in academics, graduations and sports because of the flu.", + "length": 56 + }, + { + "text": "\"We're doing the best we can as fast as we can,\" he said.", + "length": 57 + }, + { + "text": "\" The WHO said Mexico has 156 confirmed cases and nine deaths.", + "length": 62 + }, + { + "text": "Thirteen countries have confirmed cases, the organization said.", + "length": 63 + }, + { + "text": "Watch how Mexican authorities are dealing with the outbreak » .", + "length": 64 + }, + { + "text": "And what I can say is that we are heading in the right direction.", + "length": 65 + }, + { + "text": "And Alabama has stopped such competitions until at least Tuesday.", + "length": 65 + }, + { + "text": "Australia, which has had no confirmed cases, was investigating 114.", + "length": 67 + }, + { + "text": "Marie-Paule Kieny, WHO director of the Initiative for Vaccine Research.", + "length": 71 + }, + { + "text": "The tweaked virus will be shipped to manufacturers, who will fine-tune it.", + "length": 74 + }, + { + "text": "Tourists sunbathe wearing surgical masks in the popular Mexican resort of Acapulco.", + "length": 83 + }, + { + "text": "Texas school officials have postponed all interscholastic sports until at least May 11.", + "length": 87 + }, + { + "text": "Then come more tests before national regulatory agencies decide whether to approve a vaccine.", + "length": 93 + }, + { + "text": "\" She said there is \"no doubt\" that a vaccine can be made \"in a relatively short period of time.", + "length": 96 + }, + { + "text": "Meanwhile, researchers worked to develop a vaccine for swine flu, which is also known as 2009 H1N1.", + "length": 99 + }, + { + "text": "\" \"So I just want everybody to be clear that this is why this is a cause for concern, but not alarm.", + "length": 100 + }, + { + "text": "CDC officials at a news conference Friday were asked to compare the strain with the deadly 1918 virus.", + "length": 102 + }, + { + "text": "An additional 230 cases are being investigated in the United Kingdom, and Spain has 84 suspected cases.", + "length": 103 + }, + { + "text": "As researchers work, at least one politician at the epicenter of the outbreak expressed optimism Friday.", + "length": 104 + }, + { + "text": "\" CNN's Karl Penhaul, Diana Magnay, Jake Perez, Saeed Ahmed, Umaro Djau and Nicole Saidi contributed to this report.", + "length": 116 + }, + { + "text": "Department of Education said Friday that 433 public and nonpublic schools in 17 states had been closed because of the flu outbreak.", + "length": 131 + }, + { + "text": "Yet it would take four to six months from the time the appropriate strain is identified before the first doses become available, said Dr.", + "length": 137 + }, + { + "text": "Secretary of Education Arne Duncan noted in a news conference that the number is less than 1 percent of the nation's approximate 100,000 schools.", + "length": 145 + }, + { + "text": "One death in the United States has been attributed to swine flu -- a toddler from Mexico whose family brought him to Texas for medical treatment.", + "length": 145 + }, + { + "text": "Mexican authorities say they have confirmed 16 deaths and at least 358 cases, and they suspect more than 150 deaths may have been caused by the flu.", + "length": 148 + }, + { + "text": "Hong Kong health officials said a patient who is being treated there arrived from Mexico on a China Eastern Airlines flight that stopped in Shanghai.", + "length": 149 + }, + { + "text": "The World Health Organization said Friday that the number of confirmed cases stood at 367 worldwide, including 141 in the United States and 156 in Mexico.", + "length": 154 + }, + { + "text": "The Centers for Disease Control and Prevention hopes to have a vaccine to manufacturers within a month, said Michael Shaw, lab team leader for the H1N1 response at the CDC.", + "length": 172 + }, + { + "text": "In a Cabinet meeting, President Obama on Friday praised the \"extraordinary\" government response to the virus but emphasized that \"we also need to prepare for the long term.", + "length": 172 + }, + { + "text": "\" The steps involved in producing a vaccine involve isolating a strain of the virus, which has already been done, and tweaking it so manufacturers can make a vaccine, Kieny said.", + "length": 178 + }, + { + "text": "GENEVA, Switzerland (CNN) -- The number of confirmed swine flu cases across the globe kept rising Friday, but some signs of hope emerged in the battle against the worldwide outbreak.", + "length": 182 + }, + { + "text": "\"We do have a problem, but I say this so that we know where we are as a city after we have done all we have done, and in what direction we are heading and how much we have progressed.", + "length": 183 + }, + { + "text": "He said there are indications from Mexico that \"relatively young, healthy people\" have died rather than people whose immune systems are compromised, and \"that's why we're taking it seriously.", + "length": 191 + }, + { + "text": "However, she added, \"We know there's a great deal that we do not yet understand about the virulence of the 1918 virus or other influenza viruses that have a more severe clinical picture in humans.", + "length": 196 + }, + { + "text": "For example, 22 students Slippery Rock University in Pennsylvania who just returned from from a five-week trip to Mexico City will get their diplomas at a separate ceremony when they graduate Saturday.", + "length": 201 + }, + { + "text": "\"What we have found by looking very carefully at the sequences of the new H1N1 virus is that we do not see the markers for virulence that were seen in the 1918 virus,\" said Nancy Cox, chief of the CDC's Influenza Division.", + "length": 222 + }, + { + "text": "Earlier Friday, United Flight 903 was diverted to Boston, Massachusetts, on Friday after a female passenger started complaining of \"flu-like\" symptoms on a Munich-to-Washington flight, Logan Airport spokesman Phil Orendella said.", + "length": 229 + }, + { + "text": "We are essentially ensuring that, in the worst-case scenario, we can manage this appropriately, government working with businesses and individuals, the private sector, and containing an outbreak, and that we can, ultimately, get through this.", + "length": 242 + }, + { + "text": "\" In addition to the confirmed H1N1 cases in Mexico and the United States, Canada has 34; Spain has 13; United Kingdom has 8; New Zealand and Germany each have 4; Israel has 2; Austria, China, Denmark, Netherlands and Switzerland each have one, according to the WHO.", + "length": 266 + }, + { + "text": "Authorities in Mexico are \"beginning to see evidence that the [virus] might be letting up, and the number of people who have been hospitalized has leveled out in regards to people who are contagious, at least as of yesterday,\" Mexico City Mayor Marcelo Ebrard told reporters.", + "length": 275 + }, + { + "text": "\" \"Since we know that these kinds of threats can emerge at any moment, even if it turns out that the H1N1 is relatively mild on the front end, it could come back in a more virulent form during the actual flu season, and that's why we are investing in our public health infrastructure.", + "length": 284 + }, + { + "text": "The CDC gave the following state-by-state breakdown of the 141 confirmed H1N1 cases in the United States: Arizona, 4; California, 13; Colorado, 2; Delaware, 4; Illinois, 3; Indiana, 3; Kansas, 2; Kentucky, 1; Massachusetts, 2; Michigan, 2; Minnesota, 1; Nebraska, 1; Nevada, 1; New Jersey, 5; New York, 50; Ohio, 1; South Carolina, 16; Texas, 28; and Virginia, 2.", + "length": 363 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5886473953723907 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:55.803628569Z", + "first_section_created": "2025-12-23T09:33:55.80391348Z", + "last_section_published": "2025-12-23T09:33:55.804277394Z", + "all_results_received": "2025-12-23T09:33:55.897609744Z", + "output_generated": "2025-12-23T09:33:55.897849153Z", + "total_processing_time_ms": 94, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 93, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:55.80391348Z", + "publish_time": "2025-12-23T09:33:55.804144489Z", + "first_worker_start": "2025-12-23T09:33:55.804646708Z", + "last_worker_end": "2025-12-23T09:33:55.889956Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:55.804660609Z", + "start_time": "2025-12-23T09:33:55.804737112Z", + "end_time": "2025-12-23T09:33:55.804844016Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:55.804901Z", + "start_time": "2025-12-23T09:33:55.805043Z", + "end_time": "2025-12-23T09:33:55.889956Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:55.804636708Z", + "start_time": "2025-12-23T09:33:55.804709011Z", + "end_time": "2025-12-23T09:33:55.804826015Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:55.804569205Z", + "start_time": "2025-12-23T09:33:55.804646708Z", + "end_time": "2025-12-23T09:33:55.80468911Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:33:55.80418219Z", + "publish_time": "2025-12-23T09:33:55.804277394Z", + "first_worker_start": "2025-12-23T09:33:55.804646808Z", + "last_worker_end": "2025-12-23T09:33:55.896664Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:55.804736512Z", + "start_time": "2025-12-23T09:33:55.804814315Z", + "end_time": "2025-12-23T09:33:55.804842616Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:55.804924Z", + "start_time": "2025-12-23T09:33:55.80523Z", + "end_time": "2025-12-23T09:33:55.896664Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:55.804747012Z", + "start_time": "2025-12-23T09:33:55.804782814Z", + "end_time": "2025-12-23T09:33:55.804835416Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:55.804591606Z", + "start_time": "2025-12-23T09:33:55.804646808Z", + "end_time": "2025-12-23T09:33:55.804666609Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 175, + "min_processing_ms": 84, + "max_processing_ms": 91, + "avg_processing_ms": 87, + "median_processing_ms": 91, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3286, + "slowest_section_id": 1, + "slowest_section_time_ms": 92 + } +} diff --git a/data/output/0037acc8ecedb7b9ad3dbc87e229cec3eeea2bd6.json b/data/output/0037acc8ecedb7b9ad3dbc87e229cec3eeea2bd6.json new file mode 100644 index 0000000..1bb5263 --- /dev/null +++ b/data/output/0037acc8ecedb7b9ad3dbc87e229cec3eeea2bd6.json @@ -0,0 +1,242 @@ +{ + "file_name": "0037acc8ecedb7b9ad3dbc87e229cec3eeea2bd6.txt", + "total_words": 387, + "top_n_words": [ + { + "word": "the", + "count": 18 + }, + { + "word": "her", + "count": 17 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "she", + "count": 11 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "on", + "count": 8 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "i", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Sara Malm .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "17:36 EST, 17 April 2013 .", + "length": 26 + }, + { + "text": "17:16 EST, 17 April 2013 .", + "length": 26 + }, + { + "text": "Now, Ms Hormigos says she wants revenge on her former partner.", + "length": 62 + }, + { + "text": "Now I can laugh if they make jokes about the video,’ she said.", + "length": 64 + }, + { + "text": "’ ‘Everybody has been saying for months that I was going to do it.", + "length": 70 + }, + { + "text": "‘People would think I am made of ice, but it was horrible, although I had to get on with my life.", + "length": 99 + }, + { + "text": "A Spanish councillor who resigned after her home-made porn video went viral is posing topless to ‘shut up her critics’.", + "length": 123 + }, + { + "text": "Now the primary school teacher and mother-of-two wants to put it all in the past by appearing in next-to-nothing in a Spanish magazine.", + "length": 135 + }, + { + "text": "The clip quickly spread around the town of 6,500 people before it was posted on YouTube and spread on social networks across Spain in September.", + "length": 144 + }, + { + "text": "That'll teach em: Former councillor Olvido Hormigos, 42, who was forced to resign following the sex-tape leak, is now hitting back at her critics by taking her clothes off .", + "length": 173 + }, + { + "text": "Olvido Hormigos, 42, was forced to leave her position as a Socialist Workers Party electee in central Spain when a clip showing her pleasuring herself on a bed leaked online.", + "length": 174 + }, + { + "text": "Gracing the cover in nothing but a pair of see-through knickers and high heels, she told magazine Interviú, she wants to show the world that she can do ‘whatever she wants with her body’.", + "length": 192 + }, + { + "text": "‘All I want is that the recipient of the video who I dated for three months and that now walks around the town as usual, pays for it,’ she says in the interview accompanying the racy photos.", + "length": 194 + }, + { + "text": "Party representative: Olvido was a Socialist Workers Party councillor in Toledo, central Spain, when the clip she sent to her then-partner, showing her pleasuring herself on a bed, leaked online .", + "length": 196 + }, + { + "text": "Upon her resignation Ms Hormigos branded the leak 'an attack on her privacy,' saying she handed in her notice 'out of respect for her family and to her party, the PSOE,’ she told Spanish newspaper El Mundo.", + "length": 208 + }, + { + "text": "Ms Hormigos was forced to step down from her seat as councillor in Los Yébenes, Toledo, 80 miles south of Madrid, when the graphic footage she had sent to her footballer boyfriend was leaked on the internet.", + "length": 208 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.604281485080719 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:56.305256883Z", + "first_section_created": "2025-12-23T09:33:56.305591996Z", + "last_section_published": "2025-12-23T09:33:56.305782804Z", + "all_results_received": "2025-12-23T09:33:56.379624091Z", + "output_generated": "2025-12-23T09:33:56.379789098Z", + "total_processing_time_ms": 74, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 73, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:56.305591996Z", + "publish_time": "2025-12-23T09:33:56.305782804Z", + "first_worker_start": "2025-12-23T09:33:56.306365727Z", + "last_worker_end": "2025-12-23T09:33:56.378693Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:56.306373827Z", + "start_time": "2025-12-23T09:33:56.306435229Z", + "end_time": "2025-12-23T09:33:56.306474731Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:56.30659Z", + "start_time": "2025-12-23T09:33:56.306794Z", + "end_time": "2025-12-23T09:33:56.378693Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 71 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:56.306304524Z", + "start_time": "2025-12-23T09:33:56.306365727Z", + "end_time": "2025-12-23T09:33:56.306421429Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:56.306371727Z", + "start_time": "2025-12-23T09:33:56.30644813Z", + "end_time": "2025-12-23T09:33:56.306473931Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 71, + "min_processing_ms": 71, + "max_processing_ms": 71, + "avg_processing_ms": 71, + "median_processing_ms": 71, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2153, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/00382a0e672202c7804fa3c706cc236fddf6ed4d.json b/data/output/00382a0e672202c7804fa3c706cc236fddf6ed4d.json new file mode 100644 index 0000000..17b530e --- /dev/null +++ b/data/output/00382a0e672202c7804fa3c706cc236fddf6ed4d.json @@ -0,0 +1,302 @@ +{ + "file_name": "00382a0e672202c7804fa3c706cc236fddf6ed4d.txt", + "total_words": 699, + "top_n_words": [ + { + "word": "the", + "count": 45 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "for", + "count": 13 + }, + { + "word": "as", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "is", + "count": 10 + }, + { + "word": "s", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "co.", + "length": 3 + }, + { + "text": "uk .", + "length": 4 + }, + { + "text": "europcar.", + "length": 9 + }, + { + "text": "Watch the YouTube video here .", + "length": 30 + }, + { + "text": "For more information visit: www.", + "length": 32 + }, + { + "text": "The family ‘resemblance’ is therefore easy to see.", + "length": 54 + }, + { + "text": "Europcar is one of the leading mobility players in Europe.", + "length": 58 + }, + { + "text": "I need more practice but enjoyed seeing the creation of Uncle Kenny.", + "length": 68 + }, + { + "text": "Wilshere's 'uncle' is a former schoolboy footballer who now works as a plumber .", + "length": 80 + }, + { + "text": "' Mertesacker revealed it would require 'a big lad, [NBA champion] Dirk Nowitzki.", + "length": 81 + }, + { + "text": "Then, Wilshere reluctantly introduce ‘Uncle Kenny Wilshere’ (played by Grant Davis).", + "length": 88 + }, + { + "text": "Alex Oxlade-Chamberlain's face was also super-imposed onto MacIntosh's face for the film .", + "length": 90 + }, + { + "text": "Trudy, Per's 'sister', produces this cake as a tribute for Arsenal manager Arsene Wenger .", + "length": 90 + }, + { + "text": "Trudy always attends matches, but believes that baking is more challenging than football .", + "length": 90 + }, + { + "text": "Trudy, Per Mertesacker's 'sister', is a German baker, played by actress Lillian Schiffer .", + "length": 91 + }, + { + "text": "Darren is affectionately known as ‘The Shrub’ because he can stand still for long periods of time .", + "length": 103 + }, + { + "text": "I did drama at school and had to drop acting for football so quite enjoyed the chance to relive my past.", + "length": 104 + }, + { + "text": "' The Office's Ewen MacIntosh plays Alex Oxlade-Chamberlain's cousin Darren, portrayed as a plane spotter .", + "length": 107 + }, + { + "text": "' Wilshere added: 'I’ve done a bit of acting in the past but it’s not something that comes naturally to me.", + "length": 111 + }, + { + "text": "Kenny travels to The Emirates to watch his nephew in action but secretly harbours desires to play for the club .", + "length": 112 + }, + { + "text": "Joking about which of the three players was the most natural actor, Mertesacker said: 'Oh that would be me, for sure.", + "length": 117 + }, + { + "text": "Trudy always attends matches to watch her famous brother in action but believes that baking is more challenging than football.", + "length": 126 + }, + { + "text": "The short film also features ‘Darren Oxlade-Chamberlain’ (played by Ewen MacIntosh), the plane-spotting cousin of winger Alex.", + "length": 130 + }, + { + "text": "Talking about the acting experience to create the video, Oxlade-Chamberlain said: 'I enjoyed the chance to do a bit of acting in the video.", + "length": 139 + }, + { + "text": "I’ve done a bit of acting before but this video was something different, involved more expression and I felt comfortable and enjoyed the experience.", + "length": 150 + }, + { + "text": "' When asked who the players would like to play them in movies of their lives, Wilshere opted for Daniel Craig, Oxlade-Chamberlain suggested Denzel Washington .", + "length": 160 + }, + { + "text": "Present in over 130 countries, the group provides customers with one of the largest car rental networks through its own operators, franchisees and partnerships.", + "length": 160 + }, + { + "text": "And they not only appeared as themselves but also as their ‘relatives’, through their faces being digitally super-imposed onto the faces of the actors playing those parts.", + "length": 175 + }, + { + "text": "‘Face to Face with the Arsenal Family’ introduces larger-than-life ‘family’ members of each of the players, telling the story of their journey to matchday at The Emirates.", + "length": 179 + }, + { + "text": "Whilst Alex is a confident, outgoing member of his family, Darren is affectionately known as ‘The Shrub’ because he can stand still for long periods of time, key for plane spotting.", + "length": 185 + }, + { + "text": "Kenneth Wilshere, played by Grant Davis with Jack's face super-imposed, is the Arsenal star's 'uncle' Wilshere jokingly speaks of his embarrassment of his 'uncle' during the mockumentary .", + "length": 189 + }, + { + "text": "As well as ‘starring’ in the mockumentary, which follows the players’ make-believe relatives travelling to The Emirates on matchday, the players had input into the scripts and storyboards.", + "length": 194 + }, + { + "text": "The film has been released by Europcar, the Official Car and Van Rental partner to Arsenal, and begins with champion baker ‘Trudy Mertesacker’ (played by Lillian Schiffer) being introduced by her 'brother' Per.", + "length": 214 + }, + { + "text": "A former schoolboy footballer who now works as a plumber, as well as being a keen poet, Uncle Kenny travels to The Emirates to watch his star nephew in action but secretly harbours desires to play for the club himself.", + "length": 218 + }, + { + "text": "”' When asked who the players would like to play them in movies of their lives, Wilshere opted for Daniel Craig, Oxlade-Chamberlain suggested Denzel Washington, but said: 'It would be a bit of a stretch and he would be dropping down a peg or two.", + "length": 248 + }, + { + "text": "Arsenal stars Per Mertesacker, Jack Wilshere and Alex Oxlade-Chamberlain have swapped the football pitch for amateur dramatics, appearing in a 'mockumentary' video which sees actors - including Ewen Macintosh of The Office - take on the roles of their fictional relatives.", + "length": 272 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5318121910095215 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:56.806540484Z", + "first_section_created": "2025-12-23T09:33:56.806972401Z", + "last_section_published": "2025-12-23T09:33:56.807156909Z", + "all_results_received": "2025-12-23T09:33:56.871661531Z", + "output_generated": "2025-12-23T09:33:56.871813837Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:56.806972401Z", + "publish_time": "2025-12-23T09:33:56.807156909Z", + "first_worker_start": "2025-12-23T09:33:56.807802234Z", + "last_worker_end": "2025-12-23T09:33:56.870671Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:56.807725431Z", + "start_time": "2025-12-23T09:33:56.807802234Z", + "end_time": "2025-12-23T09:33:56.807893637Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:56.807988Z", + "start_time": "2025-12-23T09:33:56.808165Z", + "end_time": "2025-12-23T09:33:56.870671Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:56.807817834Z", + "start_time": "2025-12-23T09:33:56.807886537Z", + "end_time": "2025-12-23T09:33:56.807978941Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:56.807741531Z", + "start_time": "2025-12-23T09:33:56.807821235Z", + "end_time": "2025-12-23T09:33:56.807893337Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4288, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/00385e5382f413e0c4234ddf71d692ed1e89622f.json b/data/output/00385e5382f413e0c4234ddf71d692ed1e89622f.json new file mode 100644 index 0000000..af4a9ff --- /dev/null +++ b/data/output/00385e5382f413e0c4234ddf71d692ed1e89622f.json @@ -0,0 +1,238 @@ +{ + "file_name": "00385e5382f413e0c4234ddf71d692ed1e89622f.txt", + "total_words": 231, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "s", + "count": 6 + }, + { + "word": "to", + "count": 6 + }, + { + "word": "whale", + "count": 6 + }, + { + "word": "in", + "count": 5 + }, + { + "word": "pilot", + "count": 5 + }, + { + "word": "and", + "count": 4 + }, + { + "word": "hamm", + "count": 4 + }, + { + "word": "it", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "The video starts out normal.", + "length": 28 + }, + { + "text": "Guys were pointing and yelling.", + "length": 31 + }, + { + "text": "That's one way to make a splash.", + "length": 32 + }, + { + "text": "The plane landed safely seconds later.", + "length": 38 + }, + { + "text": "(CNN) -- Wise men say to look before you leap.", + "length": 46 + }, + { + "text": "Later Hamm showed the pilot the video he shot.", + "length": 46 + }, + { + "text": "In Alaska, it's advisable to look before you land.", + "length": 50 + }, + { + "text": "Rare albino whale 'parades' off Australian coast .", + "length": 50 + }, + { + "text": "I thought, 'Oh something must be wrong,'\" Hamm told CNN.", + "length": 56 + }, + { + "text": "For a moment, it appeared the whale and plane would collide.", + "length": 60 + }, + { + "text": "Jetliner diverts to Pacific atoll, mechanical glitch blamed .", + "length": 61 + }, + { + "text": "But the pilot pulled up, getting just enough lift to avoid the mammal.", + "length": 70 + }, + { + "text": "\"All the sudden, the pilot advanced the throttle and I didn't know why.", + "length": 71 + }, + { + "text": "That something was a whale, a humpback, swimming just under the surface.", + "length": 72 + }, + { + "text": "But as the plane lowers, it's clear something is different about this approach.", + "length": 79 + }, + { + "text": "That's because, in Alaska, where seaplanes are common, you just might land on a whale.", + "length": 86 + }, + { + "text": "It was a mundane scene in the island community that's only accessible by boat or seaplane.", + "length": 90 + }, + { + "text": "Hamm said the pilot told him he didn't notice the whale; he reacted to the commotion on the shore.", + "length": 98 + }, + { + "text": "Last week in tiny, remote Angoon, Thomas Hamm was shooting video of a seaplane coming in for a landing.", + "length": 103 + }, + { + "text": "Right as the pilot pulled up, the whale breached, clearing his blowhole and drenching the plane's windshield.", + "length": 109 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.42973652482032776 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:57.307855187Z", + "first_section_created": "2025-12-23T09:33:57.308167699Z", + "last_section_published": "2025-12-23T09:33:57.308364607Z", + "all_results_received": "2025-12-23T09:33:57.377787521Z", + "output_generated": "2025-12-23T09:33:57.377909026Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:57.308167699Z", + "publish_time": "2025-12-23T09:33:57.308364607Z", + "first_worker_start": "2025-12-23T09:33:57.309062234Z", + "last_worker_end": "2025-12-23T09:33:57.374834Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:57.309012732Z", + "start_time": "2025-12-23T09:33:57.309067234Z", + "end_time": "2025-12-23T09:33:57.309092835Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:57.309205Z", + "start_time": "2025-12-23T09:33:57.30933Z", + "end_time": "2025-12-23T09:33:57.374834Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:57.309034333Z", + "start_time": "2025-12-23T09:33:57.309077535Z", + "end_time": "2025-12-23T09:33:57.309110836Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:57.308994931Z", + "start_time": "2025-12-23T09:33:57.309062234Z", + "end_time": "2025-12-23T09:33:57.309084135Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1295, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0038852bafbb3df37963a424bca159c97e8a955f.json b/data/output/0038852bafbb3df37963a424bca159c97e8a955f.json new file mode 100644 index 0000000..b33375a --- /dev/null +++ b/data/output/0038852bafbb3df37963a424bca159c97e8a955f.json @@ -0,0 +1,238 @@ +{ + "file_name": "0038852bafbb3df37963a424bca159c97e8a955f.txt", + "total_words": 252, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "a", + "count": 4 + }, + { + "word": "not", + "count": 4 + }, + { + "word": "s", + "count": 4 + }, + { + "word": "was", + "count": 4 + }, + { + "word": "zuma", + "count": 4 + }, + { + "word": "according", + "count": 3 + }, + { + "word": "at", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "William Turvill .", + "length": 17 + }, + { + "text": "15:15 EST, 24 December 2013 .", + "length": 29 + }, + { + "text": "16:59 EST, 24 December 2013 .", + "length": 29 + }, + { + "text": "People just interpreted things their own way.", + "length": 45 + }, + { + "text": "He said members of the union were given the instructions on Friday.", + "length": 67 + }, + { + "text": "[Journalists] were told not to punt one side of the story to balance it.", + "length": 72 + }, + { + "text": "According to News24, SABC spokesman Kaizer Kganyago said: ‘There was no such instruction.", + "length": 91 + }, + { + "text": "SABC has been told not to broadcast calls for Jacob Zuma to stand down as South Africa president .", + "length": 98 + }, + { + "text": "SABC was reportedly told not to broadcast people booing Mr Zuma at Nelson Mandela's memorial service .", + "length": 102 + }, + { + "text": "According to City Press, the crowd booed his face every time it showed up on a big screen at the service.", + "length": 105 + }, + { + "text": "’ He added: ‘They spent a long time one side of the story without calling to get views from the other side.", + "length": 111 + }, + { + "text": "A spokesman from the country's Right2Know group said the interference 'makes a mockery of the principle of freedom of expression'.", + "length": 130 + }, + { + "text": "The claim, which is denied by the South African Broadcasting Corporation, was reported by Right2Know and fellow campaign group SOS Coalition.", + "length": 141 + }, + { + "text": "The South African public broadcaster has been warned not to report on calls for Jacob Zuma to step down as president, according to campaign groups.", + "length": 147 + }, + { + "text": "It was also reported earlier this month that broadcasts showing of booing of President Zuma at Nelson Mandela’s memorial service had been banned.", + "length": 147 + }, + { + "text": "’ Right2Know's Mark Weinberg claimed the groups had learned about the instruction through the Broadcast, Electronic, Media and Allied Workers’ Union.", + "length": 153 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7206720113754272 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:57.809119787Z", + "first_section_created": "2025-12-23T09:33:57.809488002Z", + "last_section_published": "2025-12-23T09:33:57.809718811Z", + "all_results_received": "2025-12-23T09:33:57.87798588Z", + "output_generated": "2025-12-23T09:33:57.878191588Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:57.809488002Z", + "publish_time": "2025-12-23T09:33:57.809718811Z", + "first_worker_start": "2025-12-23T09:33:57.810262432Z", + "last_worker_end": "2025-12-23T09:33:57.877006Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:57.810292933Z", + "start_time": "2025-12-23T09:33:57.810345935Z", + "end_time": "2025-12-23T09:33:57.810379536Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:57.810527Z", + "start_time": "2025-12-23T09:33:57.810681Z", + "end_time": "2025-12-23T09:33:57.877006Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:57.810250831Z", + "start_time": "2025-12-23T09:33:57.810311434Z", + "end_time": "2025-12-23T09:33:57.810351735Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:57.810193829Z", + "start_time": "2025-12-23T09:33:57.810262432Z", + "end_time": "2025-12-23T09:33:57.810282433Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1530, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/0038a07c8eb9cbf0261e8711236d05242f1a400b.json b/data/output/0038a07c8eb9cbf0261e8711236d05242f1a400b.json new file mode 100644 index 0000000..b71970e --- /dev/null +++ b/data/output/0038a07c8eb9cbf0261e8711236d05242f1a400b.json @@ -0,0 +1,218 @@ +{ + "file_name": "0038a07c8eb9cbf0261e8711236d05242f1a400b.txt", + "total_words": 255, + "top_n_words": [ + { + "word": "the", + "count": 18 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "this", + "count": 5 + }, + { + "word": "ageing", + "count": 4 + }, + { + "word": "as", + "count": 4 + }, + { + "word": "cheeks", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Worried about a crooked nose?", + "length": 29 + }, + { + "text": "Want youthful looking smooth cheeks?", + "length": 36 + }, + { + "text": "The Tsun nose straightener could be just what you need .", + "length": 56 + }, + { + "text": "This 'age liner cheek stretcher' is said to give the desired effect .", + "length": 69 + }, + { + "text": "This unusual looking face mask is described as an anti-ageing strecher face sauna .", + "length": 83 + }, + { + "text": "This mouthpiece is meant to firm up sagging facial skin around the mouth and chin area .", + "length": 88 + }, + { + "text": "The appearance of smile lines is said to be erased by using this Hourei lift bra for the face .", + "length": 95 + }, + { + "text": "To get the best results the instructions say 'to make vowel sounds out loud over and over again'.", + "length": 97 + }, + { + "text": "Users are advised to insert the mouthpiece and make mouth movements with it for three minutes a day.", + "length": 100 + }, + { + "text": "The Pupeko anti-ageing mouthpiece, which claims to tighten the cheeks as the user does breathing exercises .", + "length": 108 + }, + { + "text": "A Japanese company is offering a range of bizarre mouthpieces and contraptions to help reduce the signs of ageing.", + "length": 114 + }, + { + "text": "This somewhat scary looking mask is designed to be worn while doing facial exercises to tighten the face and cheeks .", + "length": 117 + }, + { + "text": "The website, Japan Trend Shop sells the items, which are said to tighten the cheeks while doing breathing exercise and therefore hiding wrinkles.", + "length": 145 + }, + { + "text": "The Japan Trend Shop website is known for selling a range of weird and wonderful products that are said to fight the signs of ageing and improve beauty.", + "length": 152 + }, + { + "text": "Also available is a device, which is billed as being able to ‘straighten your nose’ and a mask which claims will get rid of smile lines by acting as a bra.", + "length": 159 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5516864657402039 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:58.310533993Z", + "first_section_created": "2025-12-23T09:33:58.311011412Z", + "last_section_published": "2025-12-23T09:33:58.31122142Z", + "all_results_received": "2025-12-23T09:33:58.371104662Z", + "output_generated": "2025-12-23T09:33:58.371217566Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:58.311011412Z", + "publish_time": "2025-12-23T09:33:58.31122142Z", + "first_worker_start": "2025-12-23T09:33:58.311811943Z", + "last_worker_end": "2025-12-23T09:33:58.368626Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:58.311802943Z", + "start_time": "2025-12-23T09:33:58.311856045Z", + "end_time": "2025-12-23T09:33:58.311886546Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:58.312004Z", + "start_time": "2025-12-23T09:33:58.31212Z", + "end_time": "2025-12-23T09:33:58.368626Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:58.311842745Z", + "start_time": "2025-12-23T09:33:58.311894747Z", + "end_time": "2025-12-23T09:33:58.311942049Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:58.311751541Z", + "start_time": "2025-12-23T09:33:58.311811943Z", + "end_time": "2025-12-23T09:33:58.311825944Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1462, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/0038b990f0c9babcf678ca6712f77db79825364e.json b/data/output/0038b990f0c9babcf678ca6712f77db79825364e.json new file mode 100644 index 0000000..3c8797b --- /dev/null +++ b/data/output/0038b990f0c9babcf678ca6712f77db79825364e.json @@ -0,0 +1,302 @@ +{ + "file_name": "0038b990f0c9babcf678ca6712f77db79825364e.txt", + "total_words": 674, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "were", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "rugby", + "count": 11 + }, + { + "word": "said", + "count": 10 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "players", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Stewart Maclean .", + "length": 17 + }, + { + "text": "07:08 EST, 26 March 2012 .", + "length": 26 + }, + { + "text": "08:16 EST, 26 March 2012 .", + "length": 26 + }, + { + "text": "He said: 'It is something I will always remember.", + "length": 49 + }, + { + "text": "'Some of us took out two or three at a time and almost got into trouble ourselves.", + "length": 82 + }, + { + "text": "'Only eight of us and 21 desperate people with wide open eyes begging to come out.", + "length": 82 + }, + { + "text": "The South African Rugby Union today described the incident as a 'devastating tragedy'.", + "length": 86 + }, + { + "text": "'The next moment we just saw arms and hands in the air as the current swept them away.", + "length": 86 + }, + { + "text": "Six rugby players have drowned after being swept out to sea by freak currents in South Africa.", + "length": 94 + }, + { + "text": "He added: 'Everything was peaceful, the sea was calm and they were up to their chests in the water.", + "length": 99 + }, + { + "text": "' Tragedy: Motherwell Rugby Club confirmed the news about their players on its Facebook page today .", + "length": 100 + }, + { + "text": "The shaken coach said the athletes were enjoying a swim when strong currents dragged them out to sea.", + "length": 101 + }, + { + "text": "Some of the rescue team today described a scene of 'hell' as the swimmers were swept into the open water.", + "length": 105 + }, + { + "text": "' One player has been confirmed dead and five are missing presumed drowned after the South African tragedy .", + "length": 108 + }, + { + "text": "Rescuers said yesterday's tragedy happened after the rugby players became sucked up in a freak dangerous rip tide.", + "length": 114 + }, + { + "text": "The 15 people who were pulled alive from the waves were treated on the beach for shock and near-drowning symptoms.", + "length": 114 + }, + { + "text": "Sea rescuers said the rugby stars were among a group of 21 bathers caught up in the strong currents at midday on Sunday.", + "length": 120 + }, + { + "text": "Mr Gray said rescuers launched an extensive search for the missing rugby players but were forced to suspend the hunt at dusk.", + "length": 125 + }, + { + "text": "'All are believed to be aged in their early 20's and all five missing are believed to be members of the Motherwell Rugby Club.", + "length": 126 + }, + { + "text": "' The Motherwell Rugby Club today confirmed six of its players had died and said the men's families were still being informed.", + "length": 126 + }, + { + "text": "Officials said the players had gone for a swim in the water to cool off after a training session when powerful currents took hold.", + "length": 130 + }, + { + "text": "The athletes, who were training for a national competition next month, were dragged away from the shore alongside several other swimmers.", + "length": 137 + }, + { + "text": "Freak currents: The Motherwell Rugby Club players had been swimming off Bluewater Bay beach in Port Elizabeth when they were swept out to sea .", + "length": 143 + }, + { + "text": "Police confirmed an inquest had been opened into the tragedy and said the dead man's body had been sent for forensic examination by pathologists.", + "length": 145 + }, + { + "text": "Lifesaver Brendon Helm told South Africa's News24 website he felt helpless as he and eight colleagues realised they would be unable to save everyone.", + "length": 149 + }, + { + "text": "'To have their afternoon turned into a day of tragedy is shocking for the whole rugby community and our thoughts and prayers go out to their families.", + "length": 150 + }, + { + "text": "'An SA Air Force 15 Squadron Charlie Flight helicopter, carrying two Port Elizabeth NSRI rescue swimmers, has joined since first light in an ongoing search operation.", + "length": 166 + }, + { + "text": "National Sea Rescue Institute station commander Ian Gray said although 15 people were rescued after the tragedy, one has been confirmed dead and five are missing presumed drowned.", + "length": 179 + }, + { + "text": "One body has been found and five others are presumed drowned after players from Motherwell Rugby Club were caught up in strong rip tides on Port Elizabeth's Bluewater Bay beach on Sunday.", + "length": 187 + }, + { + "text": "Mr Gray added: 'An extensive search by three NSRI sea rescue craft, a rescue rubber-duck and rescue jet-ski and the emergency services helicopter has revealed no sign of the five missing men.", + "length": 191 + }, + { + "text": "The organisation's president Oregan Hoskins said: 'These young men were preparing to compete in a Saru Easter Tournament in Cape Town in a fortnight and were enjoying a carefree day on the beach with their team-mates.", + "length": 217 + }, + { + "text": "The team's manager Mcdisi Mazamba told how he had taken his players for their weekly training session at Port Elizabeth's Bluewater Bay beach, which lies on the Indian Ocean and is famous for its clean sand and safe swimming.", + "length": 225 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7448022961616516 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:58.811991101Z", + "first_section_created": "2025-12-23T09:33:58.813205949Z", + "last_section_published": "2025-12-23T09:33:58.813419657Z", + "all_results_received": "2025-12-23T09:33:58.876891839Z", + "output_generated": "2025-12-23T09:33:58.877061646Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:58.813205949Z", + "publish_time": "2025-12-23T09:33:58.813419657Z", + "first_worker_start": "2025-12-23T09:33:58.81398878Z", + "last_worker_end": "2025-12-23T09:33:58.876057Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:58.813921277Z", + "start_time": "2025-12-23T09:33:58.81398878Z", + "end_time": "2025-12-23T09:33:58.814064583Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:58.814227Z", + "start_time": "2025-12-23T09:33:58.81438Z", + "end_time": "2025-12-23T09:33:58.876057Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:58.813914577Z", + "start_time": "2025-12-23T09:33:58.81399418Z", + "end_time": "2025-12-23T09:33:58.814098984Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:58.813948378Z", + "start_time": "2025-12-23T09:33:58.814034581Z", + "end_time": "2025-12-23T09:33:58.814070783Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3867, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/0038f760e32cbfc2bff795dececb07e5609c2b58.json b/data/output/0038f760e32cbfc2bff795dececb07e5609c2b58.json new file mode 100644 index 0000000..1bfd952 --- /dev/null +++ b/data/output/0038f760e32cbfc2bff795dececb07e5609c2b58.json @@ -0,0 +1,322 @@ +{ + "file_name": "0038f760e32cbfc2bff795dececb07e5609c2b58.txt", + "total_words": 564, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "her", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "she", + "count": 15 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "lynnwood", + "count": 8 + }, + { + "word": "was", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "M.", + "length": 2 + }, + { + "text": "M.", + "length": 2 + }, + { + "text": "M.", + "length": 2 + }, + { + "text": "M.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "happened.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Helen Pow .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Last week, D.", + "length": 13 + }, + { + "text": "and her family ...", + "length": 18 + }, + { + "text": "filed a federal civil .", + "length": 23 + }, + { + "text": "16:39 EST, 11 June 2013 .", + "length": 25 + }, + { + "text": "16:32 EST, 11 June 2013 .", + "length": 25 + }, + { + "text": "Police Chief Steven Jensen.", + "length": 27 + }, + { + "text": "The woman, identified only as D.", + "length": 32 + }, + { + "text": "it she claims the detectives ignored .", + "length": 38 + }, + { + "text": "'s photograph and ID card in his wallet.", + "length": 40 + }, + { + "text": "She also accuses them of threatening to .", + "length": 41 + }, + { + "text": "Mason is still with the Lynnwood Police Department.", + "length": 51 + }, + { + "text": "called Cocoon House, when she insisted it did take place.", + "length": 57 + }, + { + "text": "her account of events and bullied her into saying the rape never .", + "length": 66 + }, + { + "text": "The facility and two of its employees are also named in the lawsuit.", + "length": 68 + }, + { + "text": "have her thrown out of her apartment, owned by an at-risk youth home .", + "length": 70 + }, + { + "text": "rights suit against the city of Lynnwood, Rittgarn, Mason and Lynnwood .", + "length": 72 + }, + { + "text": "Cocoon House CEO Cassie Franklin told the Seattle Times this week: 'Our hearts go out to D.", + "length": 91 + }, + { + "text": "The victim, who now lives in Wyoming, and her lawyer also declined to comment to the Seattle Times.", + "length": 99 + }, + { + "text": "We strongly believe that Cocoon House and its employees acted appropriately on behalf of the client.", + "length": 100 + }, + { + "text": "O'Leary was convicted of five rapes, including the Lynnwood one, and is serving a 327-year sentence in a Colorado prison.", + "length": 121 + }, + { + "text": "in court documents, was gagged, bound and sexually assaulted at knife-point in her Lynnwood apartment by an intruder five years ago.", + "length": 132 + }, + { + "text": "She describes in the law suit the man, wielding a kitchen knife, warned her that he had taken photos of her and knew her name as he fled the apartment.", + "length": 151 + }, + { + "text": "Youth center: She has also named in the law suit Cocoon House, pictured, a youth facility that forced her to stand up in front of a group of teens and confess that she'd lied about the rape .", + "length": 191 + }, + { + "text": "Law suit: A rape victim is suing former Lynnwood, Washington, detective Jerry Rittgarn, pictured, for discounting her story and ignoring evidence supporting the attack by Marc O'Leary, right .", + "length": 192 + }, + { + "text": "She says the officers bullied her for hours, without a lawyer present, into recanting her account and, when she later tried to insist the rape did take place, she was charged $500 for false reporting.", + "length": 200 + }, + { + "text": "After the attack, she says she was forced to stand up in front of a group of teens at a Cocoon House-run program and confess that she'd lied about the rape, something which resulted in her having to see a councilor.", + "length": 215 + }, + { + "text": "According to the Seattle Times, two-and-a-half years later Lynnwood police reopened the investigation, after Marc O'Leary, a man wanted for a string of sex attacks in Washington state and Colorado, was found to have D.", + "length": 218 + }, + { + "text": "' Jensen refused to comment to the newspaper because of the litigation and Rittgarn, who has since left the Lynnwood Police Department and now lives in San Diego, said he was unaware of the lawsuit and couldn't remember the case.", + "length": 229 + }, + { + "text": "A Washington state woman who was a victim of a serial sex offender is suing police after they allegedly discounted her story, ignored evidence proving she was raped and then slapped her with a $500 fine for making a false statement.", + "length": 232 + }, + { + "text": "Then 18, she reported the attack but she says detectives Jerry Rittgarn and Sergeant Jeff Mason didn't believe her story, and ignored evidence including stained sheets, doctors reports detailing injuries to her wrists and abrasions on her genitals and DNA samples.", + "length": 264 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7522079348564148 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:59.31370992Z", + "first_section_created": "2025-12-23T09:33:59.315294582Z", + "last_section_published": "2025-12-23T09:33:59.315479389Z", + "all_results_received": "2025-12-23T09:33:59.387309297Z", + "output_generated": "2025-12-23T09:33:59.387506105Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:59.315294582Z", + "publish_time": "2025-12-23T09:33:59.315479389Z", + "first_worker_start": "2025-12-23T09:33:59.31601941Z", + "last_worker_end": "2025-12-23T09:33:59.385014Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:59.315986809Z", + "start_time": "2025-12-23T09:33:59.316060512Z", + "end_time": "2025-12-23T09:33:59.316142715Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:59.316188Z", + "start_time": "2025-12-23T09:33:59.316324Z", + "end_time": "2025-12-23T09:33:59.385014Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:59.315966308Z", + "start_time": "2025-12-23T09:33:59.31603171Z", + "end_time": "2025-12-23T09:33:59.316112414Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:59.315950607Z", + "start_time": "2025-12-23T09:33:59.31601941Z", + "end_time": "2025-12-23T09:33:59.316073512Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3196, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/0039212685276f03caa39f873a52a6758cd7ecd1.json b/data/output/0039212685276f03caa39f873a52a6758cd7ecd1.json new file mode 100644 index 0000000..5b5c87e --- /dev/null +++ b/data/output/0039212685276f03caa39f873a52a6758cd7ecd1.json @@ -0,0 +1,246 @@ +{ + "file_name": "0039212685276f03caa39f873a52a6758cd7ecd1.txt", + "total_words": 391, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "pritchard", + "count": 10 + }, + { + "word": "his", + "count": 8 + }, + { + "word": "chris", + "count": 7 + }, + { + "word": "i", + "count": 7 + }, + { + "word": "was", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "on", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Celebrate!", + "length": 10 + }, + { + "text": "'It was never in doubt,' she said.", + "length": 34 + }, + { + "text": "'I have been nagging for long enough.", + "length": 37 + }, + { + "text": "It has taken a lot of hard work,' he said.", + "length": 42 + }, + { + "text": "It was a really good keirin, I really enjoyed it.", + "length": 49 + }, + { + "text": "'I have been plotting this for about five or six months.", + "length": 56 + }, + { + "text": "Good decision: Pritchard punches the air after his proposal success but .", + "length": 73 + }, + { + "text": "The ring is just like the one I took a picture of and left on the computer screen!", + "length": 82 + }, + { + "text": "The fans in the Velodrome cheered as loud as they have all day for the big moment .", + "length": 83 + }, + { + "text": "VIDEO Scroll down to watch cyclist Chris Pritchard proposes at Commonwealth Games .", + "length": 83 + }, + { + "text": "Big moment: Chris Pritchard gets down on one knee to propose to his girlfriend in the Velodrome .", + "length": 97 + }, + { + "text": "But the 31-year-old later admitted that the proposal had been the end of a long thought-out plan.", + "length": 97 + }, + { + "text": "Secret plan: Pritchard waves to the crowd earlier in the day during the Keirin qualifying stages .", + "length": 98 + }, + { + "text": "' Victory: Pritchard slips the ring onto his new finacee's finger after she says yes to his proposal .", + "length": 102 + }, + { + "text": "Sealed with a kiss: Chris Pritchard and Amanda Ball after the proposal in the Sir Chris Hoy Velodrome .", + "length": 103 + }, + { + "text": "'I thought, \"she is going to say yes\" so I just put that away and I just thought about what was going on in the keirin.", + "length": 119 + }, + { + "text": "And it was lucky she did, after Pritchard could only finish third in a race to decide who was the seventh place finisher in the men's keirin.", + "length": 141 + }, + { + "text": "However, new fiancee Ball said she knew the question was coming at some point before Pritchard vaulted his way up the stairs in the Velodrome.", + "length": 142 + }, + { + "text": "He might not have had much luck on the track, but Sunday at the Commonwealth Games was still an occasion to remember for Scotland's Chris Pritchard.", + "length": 148 + }, + { + "text": "Fortunately she said yes - and that decision led to celebrations in the Sir Chris Hoy Velodrome that rivaled anything from the day's track cycling action.", + "length": 154 + }, + { + "text": "' Hugo Barrette of Canada came out on top in that contest while Matthew Glaetzer won the gold later in the evening on the last day in the Sir Chris Hoy Velodrome.", + "length": 162 + }, + { + "text": "Keirin rider Pritchard leapt from his bike after losing out in race to decide the final standings, before climbing into the stands, still wearing his lyrca race gear, to propose to his girlfriend Amanda Ball.", + "length": 208 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.38221752643585205 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:33:59.816304872Z", + "first_section_created": "2025-12-23T09:33:59.818414955Z", + "last_section_published": "2025-12-23T09:33:59.818586561Z", + "all_results_received": "2025-12-23T09:33:59.881442819Z", + "output_generated": "2025-12-23T09:33:59.881587125Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:33:59.818414955Z", + "publish_time": "2025-12-23T09:33:59.818586561Z", + "first_worker_start": "2025-12-23T09:33:59.819184385Z", + "last_worker_end": "2025-12-23T09:33:59.878923Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:33:59.819255487Z", + "start_time": "2025-12-23T09:33:59.81931769Z", + "end_time": "2025-12-23T09:33:59.819354591Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:33:59.819359Z", + "start_time": "2025-12-23T09:33:59.819486Z", + "end_time": "2025-12-23T09:33:59.878923Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:33:59.819145683Z", + "start_time": "2025-12-23T09:33:59.819210586Z", + "end_time": "2025-12-23T09:33:59.819266388Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:33:59.819113482Z", + "start_time": "2025-12-23T09:33:59.819184385Z", + "end_time": "2025-12-23T09:33:59.819216486Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2139, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0039410ebf5a3919dc8d41f2a368b65512b4be14.json b/data/output/0039410ebf5a3919dc8d41f2a368b65512b4be14.json new file mode 100644 index 0000000..6b2efac --- /dev/null +++ b/data/output/0039410ebf5a3919dc8d41f2a368b65512b4be14.json @@ -0,0 +1,242 @@ +{ + "file_name": "0039410ebf5a3919dc8d41f2a368b65512b4be14.txt", + "total_words": 332, + "top_n_words": [ + { + "word": "to", + "count": 12 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "social", + "count": 9 + }, + { + "word": "the", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "use", + "count": 7 + }, + { + "word": "media", + "count": 6 + }, + { + "word": "or", + "count": 6 + }, + { + "word": "for", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "04:48 EST, 16 April 2013 .", + "length": 26 + }, + { + "text": "20:20 EST, 15 April 2013 .", + "length": 26 + }, + { + "text": "But using social media provided even greater benefits, said researcher Dr Anja Leist.", + "length": 85 + }, + { + "text": "Using an iPad, smartphone or computer can be a boost for older people, researchers say .", + "length": 88 + }, + { + "text": "For using an iPad, smartphone or computer can be a boost for older people, researchers say.", + "length": 91 + }, + { + "text": "Going online via an increasing range of easy-to-use devices gives access to a valuable source of support.", + "length": 105 + }, + { + "text": "If you want to help an elderly relative cope with ageing and illness, give them a tablet – of the hi-tech variety.", + "length": 116 + }, + { + "text": "The study ‘Social media use of older adults - A mini-review’ is published by the university’s research unit INSIDE.", + "length": 121 + }, + { + "text": "Although more research was needed, adverse affects could include access to harmful information and misuse of personal data.", + "length": 123 + }, + { + "text": "Becoming a silver surfer on Facebook, Twitter and other online social media networks helps combat disease and loneliness, a study found.", + "length": 136 + }, + { + "text": "It allows the elderly to use social networks, join discussion boards to beat loneliness, find out information about illnesses or exchange experiences.", + "length": 150 + }, + { + "text": "Other negative effects have been shown to be unfavourable social comparisons due to overly positive self-representations of others displayed in online social networks.", + "length": 167 + }, + { + "text": "Researchers from the University of Luxembourg found that merely using a computer or similar web-enabled device successfully helped older people feel better about themselves.", + "length": 173 + }, + { + "text": "’ Besides the potential for clinical practise and other positive consequences in everyday use of social media, the researchers also addressed the possible negative consequences of social media use.", + "length": 199 + }, + { + "text": "‘There are many online forums where people in difficult life situations, such as informal caregivers of a spouse with dementia or individuals with depression, can exchange thoughts as well as receive and provide support.", + "length": 222 + }, + { + "text": "Dr Leist, who aims to design websites that are easier for the elderly to use, said: ‘Older adults can use social media to access health-related information and engage in patient-to-patient or patient-doctor conversations.", + "length": 223 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.49753251671791077 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:00.319266339Z", + "first_section_created": "2025-12-23T09:34:00.319624853Z", + "last_section_published": "2025-12-23T09:34:00.319822161Z", + "all_results_received": "2025-12-23T09:34:00.387344001Z", + "output_generated": "2025-12-23T09:34:00.387499907Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:00.319624853Z", + "publish_time": "2025-12-23T09:34:00.319822161Z", + "first_worker_start": "2025-12-23T09:34:00.320350581Z", + "last_worker_end": "2025-12-23T09:34:00.386328Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:00.320364882Z", + "start_time": "2025-12-23T09:34:00.320416284Z", + "end_time": "2025-12-23T09:34:00.320461786Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:00.320608Z", + "start_time": "2025-12-23T09:34:00.320751Z", + "end_time": "2025-12-23T09:34:00.386328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:00.32033008Z", + "start_time": "2025-12-23T09:34:00.320402683Z", + "end_time": "2025-12-23T09:34:00.320457585Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:00.320279078Z", + "start_time": "2025-12-23T09:34:00.320350581Z", + "end_time": "2025-12-23T09:34:00.320372182Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2120, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0039459bc7655e0bb9dea5ddc3cc47a43fea891d.json b/data/output/0039459bc7655e0bb9dea5ddc3cc47a43fea891d.json new file mode 100644 index 0000000..776255d --- /dev/null +++ b/data/output/0039459bc7655e0bb9dea5ddc3cc47a43fea891d.json @@ -0,0 +1,250 @@ +{ + "file_name": "0039459bc7655e0bb9dea5ddc3cc47a43fea891d.txt", + "total_words": 665, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "australia", + "count": 16 + }, + { + "word": "france", + "count": 15 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "as", + "count": 11 + }, + { + "word": "with", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "Done: Australia celebrate winning the Series in a whitewash on France .", + "length": 71 + }, + { + "text": "Done: The match was a good as over by half-time as Australia stormed into a 20-6 lead .", + "length": 87 + }, + { + "text": "Prolific: Folau scored two of Australia's five tries in the 39-13 triumph over France .", + "length": 87 + }, + { + "text": "Stop: Australia's Wycliff Palu rides through a tackle from France's Mogan Parra in Sydney .", + "length": 91 + }, + { + "text": "Celebrate: The Australia team after they beat France in Sydney to win the three-Test series .", + "length": 93 + }, + { + "text": "Try: Israel Folau scores a try while Brice Dulin (right) attempts to stop him from doing so .", + "length": 93 + }, + { + "text": "Defeat: Sorry France could do little to respond to Australia's dominance, scoring a solitary try .", + "length": 98 + }, + { + "text": "A regular sight: Folau celebrates scoring his try during the third Test between Australia and France .", + "length": 102 + }, + { + "text": "Leader: Australia captain Michael Hooper holds up the Trophee des Bicentenaires after beating France .", + "length": 102 + }, + { + "text": "Keep it up: Australia's dominance stretched further into the second half as France fell at their feet .", + "length": 103 + }, + { + "text": "Foley's third-minute penalty set the hosts on their way and they were 10-0 ahead inside eight minutes thanks to newcomer Skelton.", + "length": 129 + }, + { + "text": "Israel Folau scored two of Australia's five tries in the triumph, with debutant Will Skelton, Michael Hooper and Nick Phipps also crossing.", + "length": 139 + }, + { + "text": "Australia wrapped up a series whitewash over France with a comprehensive 39-13 victory in the third and final Test at the Allianz Stadium in Sydney.", + "length": 148 + }, + { + "text": "That match was as good as over by half-time as Australia stormed into a 20-6 lead as they banished the memories of last weekend's scrappy 6-0 win in the second Test with an impressive showing.", + "length": 192 + }, + { + "text": "Flanker Hooper added a fourth Australia try on the hour mark after more quick handling and incisive running saw the Wallabies cut through the middle, with Foley adding the extras to make it 34-6.", + "length": 195 + }, + { + "text": "Foley added his second successful conversion to make it 20-3 and, although Maxime Machenaud hit back for France with a penalty just before half-time, the Wallabies were in total control at the break.", + "length": 199 + }, + { + "text": "Fly-half Bernard Foley added the rest of the points with four conversions and two penalties giving him a personal haul of 14 points as the Wallabies made it seven Tests in a row for the first time since 1999-2000.", + "length": 213 + }, + { + "text": "France full-back Brice Dulin got his side off the mark with a penalty but Foley responded with another three-pointer of his own and then Australia took advantage of the yellow card shown to Rabah Slimani to further increase their lead.", + "length": 235 + }, + { + "text": "Prop Slimani was sin-binned in the 27th minute for tackling without the ball and within a minute of his departure Australia claimed their second try, Folau stretching over in the corner after play had been quickly spread out to the right.", + "length": 238 + }, + { + "text": "The giant 22-year-old, who was drafted into starting line-up in place of former Wallabies captain James Horwill, showed his considerable power as he barged his way past the challenge of France flanker Fulgence Ouedraogo to crash over from 12 yards out.", + "length": 252 + }, + { + "text": "Sorry France could only respond with a solitary converted try from Guilhem Guirado in the second half plus a couple of penalties as they were convincing beaten - their 13th loss in 19 Tests and one that will increase the pressure on coach Philippe Saint-Andre.", + "length": 260 + }, + { + "text": "France finally had something to smile about in the 65th minute when they pulled a try back through Guirado, who was powered over by a driving maul, but Australia had the last word when replacement Phipps notched his maiden Test try late on after catching Les Bleus cold with a quickly-taken tap penalty.", + "length": 303 + }, + { + "text": "And that dominance continued in the second half with Ewen McKenzie's men further increasing their lead inside two minutes as Folau crossed for the second time in the match - and the 13th time in Tests - when he capitalised on good work from Skelton to storm through the heart of the France defence and touch down.", + "length": 313 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5508027076721191 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:00.820609942Z", + "first_section_created": "2025-12-23T09:34:00.820982557Z", + "last_section_published": "2025-12-23T09:34:00.821244567Z", + "all_results_received": "2025-12-23T09:34:00.884024422Z", + "output_generated": "2025-12-23T09:34:00.884195929Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:00.820982557Z", + "publish_time": "2025-12-23T09:34:00.821244567Z", + "first_worker_start": "2025-12-23T09:34:00.821814489Z", + "last_worker_end": "2025-12-23T09:34:00.883059Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:00.821845191Z", + "start_time": "2025-12-23T09:34:00.821925594Z", + "end_time": "2025-12-23T09:34:00.822002197Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:00.822017Z", + "start_time": "2025-12-23T09:34:00.822166Z", + "end_time": "2025-12-23T09:34:00.883059Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:00.821782888Z", + "start_time": "2025-12-23T09:34:00.821843691Z", + "end_time": "2025-12-23T09:34:00.821934094Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:00.821745487Z", + "start_time": "2025-12-23T09:34:00.821814489Z", + "end_time": "2025-12-23T09:34:00.82183959Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3765, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/003952f45c4ce03898213bb94e5865039ba33120.json b/data/output/003952f45c4ce03898213bb94e5865039ba33120.json new file mode 100644 index 0000000..896df76 --- /dev/null +++ b/data/output/003952f45c4ce03898213bb94e5865039ba33120.json @@ -0,0 +1,266 @@ +{ + "file_name": "003952f45c4ce03898213bb94e5865039ba33120.txt", + "total_words": 466, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "horse", + "count": 11 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "for", + "count": 9 + }, + { + "word": "on", + "count": 9 + }, + { + "word": "princess", + "count": 9 + }, + { + "word": "anne", + "count": 8 + }, + { + "word": "as", + "count": 8 + }, + { + "word": "of", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Mia De Graaf .", + "length": 14 + }, + { + "text": "21:03 EST, 16 November 2013 .", + "length": 29 + }, + { + "text": "21:03 EST, 16 November 2013 .", + "length": 29 + }, + { + "text": "Princess Anne \u0026 a horse #bbcaq Plain wrong.", + "length": 43 + }, + { + "text": "'Brilliant, it's a brilliant idea… perfect.", + "length": 45 + }, + { + "text": "Listeners rushed to lambast the author for his comments.", + "length": 56 + }, + { + "text": "' Others attacked the BBC for not condemning Starkey's comments.", + "length": 64 + }, + { + "text": "' Another tweeted: 'BBC just re-broadcast David Starkey's misogyny re.", + "length": 70 + }, + { + "text": "In 2011 he was attacked for calling a child 'fat' on Jamie Oliver's show Dream School.", + "length": 86 + }, + { + "text": "Mocking: David Starkey (right) was blasted by listeners for saying Princess Anne (left) looks like a horse .", + "length": 108 + }, + { + "text": "Welfare: Former eventing champion Princess Anne said selling horse meat could be an incentive for good welfare .", + "length": 112 + }, + { + "text": "One person wrote on Twitter: 'What right does Starkey have to insult Princess Anne with personal remarks on her looks?", + "length": 118 + }, + { + "text": "She was addressing warnings of a horse welfare crisis, as 7,000 horses are currently at risk of abandonment and neglect.", + "length": 120 + }, + { + "text": "One said: 'Disgraceful that David Starkey's ref to Princess Anne resembling a horse went uncontested by the Chair - patriarchal sexism in action.", + "length": 145 + }, + { + "text": "'I had never thought of her as reading Swift, for example, on what you do with the problem of Ireland,' he added, referring to the satirical piece.", + "length": 147 + }, + { + "text": "Outspoken historian David Starkey has sparked outrage by saying Princess Anne looks like a horse after she suggested eating horse meat could be a good thing.", + "length": 157 + }, + { + "text": "Speaking at the World Horse Welfare charity conference in London, Anne said: 'Our attitudes to the horse meat trade and the value of horse meat may have to change.", + "length": 163 + }, + { + "text": "He has also branded the Queen a philistine who lacks an education, and told a Question Time audience we should not free oppressed countries - and shouldn't have helped to free the French.", + "length": 187 + }, + { + "text": "' His comments follow a speech the princess made at an animal welfare conference, claiming horse owners might take better care of them if they believed they could later sell them for meat.", + "length": 188 + }, + { + "text": "Speaking on BBC Radio 4's Any Questions panel show, he said: 'I never thought of Princess Anne who is much given to riding around on horses - looks rather like one generally - as a satirist.", + "length": 190 + }, + { + "text": "He went on to compare her idea to Jonathan Swift's iconic satirical essay A Modest Proposal, which said economically-troubled Irish people should sell their children as food for rich people.", + "length": 190 + }, + { + "text": "' The academic has previously been accused of racism after describing Scotland, Wales and Ireland as ‘feeble little countries’ on the BBC1 panel programme, and blaming ‘black culture’ for the riots that swept the country.", + "length": 229 + }, + { + "text": "He went on to mock the princess, a former eventing champion, saying: 'It is exactly the same point, as mad, as brilliant, as circular – presumably it comes from having that coronet so firmly thrust into your elaborately quaffed and lacquered hair.", + "length": 249 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6000081896781921 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:01.322287259Z", + "first_section_created": "2025-12-23T09:34:01.322590271Z", + "last_section_published": "2025-12-23T09:34:01.32283988Z", + "all_results_received": "2025-12-23T09:34:01.390825139Z", + "output_generated": "2025-12-23T09:34:01.390994745Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:01.322590271Z", + "publish_time": "2025-12-23T09:34:01.32283988Z", + "first_worker_start": "2025-12-23T09:34:01.323445104Z", + "last_worker_end": "2025-12-23T09:34:01.389871Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:01.323424003Z", + "start_time": "2025-12-23T09:34:01.323482106Z", + "end_time": "2025-12-23T09:34:01.323525907Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:01.323546Z", + "start_time": "2025-12-23T09:34:01.323675Z", + "end_time": "2025-12-23T09:34:01.389871Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:01.323469405Z", + "start_time": "2025-12-23T09:34:01.323533408Z", + "end_time": "2025-12-23T09:34:01.32360551Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:01.323377701Z", + "start_time": "2025-12-23T09:34:01.323445104Z", + "end_time": "2025-12-23T09:34:01.323471505Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2791, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/003a70daf9f99e871ba06482726176d7776d0c07.json b/data/output/003a70daf9f99e871ba06482726176d7776d0c07.json new file mode 100644 index 0000000..4fbd518 --- /dev/null +++ b/data/output/003a70daf9f99e871ba06482726176d7776d0c07.json @@ -0,0 +1,354 @@ +{ + "file_name": "003a70daf9f99e871ba06482726176d7776d0c07.txt", + "total_words": 856, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "bush", + "count": 22 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "he", + "count": 16 + }, + { + "word": "said", + "count": 16 + }, + { + "word": "that", + "count": 15 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "s", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "\"George W.", + "length": 10 + }, + { + "text": "We gave it our all.", + "length": 19 + }, + { + "text": "And I'm a content man.", + "length": 22 + }, + { + "text": "Bush Presidential Center.", + "length": 25 + }, + { + "text": "Bush is responsible for that.", + "length": 29 + }, + { + "text": "Bush chose to do,\" Obama said.", + "length": 30 + }, + { + "text": "\"That's what President George W.", + "length": 32 + }, + { + "text": "I didn't compromise my principles.", + "length": 34 + }, + { + "text": "Made the best judgment calls I could.", + "length": 37 + }, + { + "text": "By the numbers: Presidential libraries .", + "length": 40 + }, + { + "text": "Bush Institute, a public policy institute.", + "length": 42 + }, + { + "text": "Bush kept his word and acted, Carter said.", + "length": 42 + }, + { + "text": "\"This is a Texas-sized party,\" Obama said.", + "length": 42 + }, + { + "text": "And I am excited about what we're going to do here.", + "length": 51 + }, + { + "text": "Dallas (CNN) -- Some may disagree whether George W.", + "length": 51 + }, + { + "text": "A glimpse at a White House before everything changed .", + "length": 54 + }, + { + "text": "CNN's John King and Brianna Keilar contributed from Dallas.", + "length": 59 + }, + { + "text": "\"As president, I tried to act on these principles every day.", + "length": 60 + }, + { + "text": "It wasn't always easy and certainly wasn't always popular ...", + "length": 61 + }, + { + "text": "CNN's Mariano Castillo wrote and reported this story in Atlanta.", + "length": 64 + }, + { + "text": "It is a rare and special occurrence when the five gather, Obama said.", + "length": 69 + }, + { + "text": "And they show how the September 11 terrorist attacks changed everything.", + "length": 72 + }, + { + "text": "\"I know this: that Laura and I gave the presidency eight years of our life.", + "length": 75 + }, + { + "text": "\"I'm more concerned about being an effective person for the rest of my life.", + "length": 76 + }, + { + "text": "In addition to the library and museum, the Presidential Center includes the George W.", + "length": 85 + }, + { + "text": "History will show, he said at the dedication, that he always stuck by his convictions.", + "length": 86 + }, + { + "text": "If he could make the decision for Jeb, he would tell him to run for president, Bush said.", + "length": 89 + }, + { + "text": "\" The center's library and museum take visitors through the turning points of Bush's two terms.", + "length": 95 + }, + { + "text": "\"When all the former living presidents are all together, it is a special day for our democracy.", + "length": 95 + }, + { + "text": "\"There are other people out there that are very qualified, and we've had enough Bushes,\" she said.", + "length": 98 + }, + { + "text": "\"You know, I'm really not that concerned about why people did what during my presidency,\" he said.", + "length": 98 + }, + { + "text": "\" The last time the five living presidents were together was right before President Obama took office.", + "length": 102 + }, + { + "text": "\" As the son of another former president, Bush said he wouldn't mind seeing his brother Jeb Bush run for the highest office.", + "length": 124 + }, + { + "text": "The first exhibits recall the 43rd president's initial priorities on education, faith-based community initiatives and tax cuts.", + "length": 127 + }, + { + "text": "\"In January of 2005, there was a peace treaty between north and south Sudan that ended a war that had been going on for 20 years,\" Carter said.", + "length": 143 + }, + { + "text": "but when future generations come to this library to study this administration, they're going to find out that we stayed true to our convictions.", + "length": 144 + }, + { + "text": "\"A free society thrives when neighbors help neighbors and the strong protect the weak and public policies promote private compassion,\" Bush said.", + "length": 145 + }, + { + "text": "Bush became a little emotional as he closed his speech: \"Whatever challenges come before us, I will always believe our nation's best days lie ahead.", + "length": 148 + }, + { + "text": "\"My deepest conviction, the guiding principle of the administration, is that the United States of America must strive to expand the reach of freedom.", + "length": 149 + }, + { + "text": "\" Carter told a story of how he asked, on Bush's inauguration day, for a meeting to talk about a civil war in Sudan that was entering its second decade.", + "length": 152 + }, + { + "text": "\" The presidents on the stage definitely differed on many policy matters, but they all share one quality, Obama said: They did what they believe is right.", + "length": 154 + }, + { + "text": "Bush was a \"uniter, not a divider,\" as he liked to say, but he did get all five living presidents together for the dedication of his presidential library.", + "length": 154 + }, + { + "text": "At Thursday's event in Dallas, Democratic former Presidents Jimmy Carter and Bill Clinton praised Bush for his initiatives in Africa, and Bush defended his record.", + "length": 163 + }, + { + "text": "\" Bush has said he is aware that the opening of his presidential library would reopen debates over the Iraq War and the policies he pursued after the September 11 terrorist attacks.", + "length": 181 + }, + { + "text": "But in an interview Thursday on NBC's \"Today,\" former first lady Barbara Bush said that while Jeb Bush is able to do the job, she would like to see other families in the White House.", + "length": 182 + }, + { + "text": "\"The political winds blow left and right, polls rise and fall, supporters come and go, but in the end, leaders are defined by the convictions they hold,\" Bush said at the ceremony for the George W.", + "length": 197 + }, + { + "text": "In an interview with CNN's John King, the former president said he knows that the center's dedication will rekindle the debate about his presidency, and he conceded the library is in part an effort by him and supporters to influence history's verdict.", + "length": 251 + }, + { + "text": "But he predicted visitors would find it \"more objective\" than they might have imagined, and he showed little interest in revisiting flashpoints like Iraq, Hurricane Katrina or the 2008 financial crisis, or the scorn with which many look back at the Bush presidency.", + "length": 265 + }, + { + "text": "Bush 43: 'History will ultimately judge' At one exhibit, the bright red dress that first lady Laura Bush wore to her husband's first state dinner, just six days before 9/11, stands in contrast to the next, most talked-about artifact in the museum: the twisted hulk of two beams from the World Trade Center.", + "length": 306 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.523725688457489 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:01.823651263Z", + "first_section_created": "2025-12-23T09:34:01.823936474Z", + "last_section_published": "2025-12-23T09:34:01.824240686Z", + "all_results_received": "2025-12-23T09:34:01.890108962Z", + "output_generated": "2025-12-23T09:34:01.89030767Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:01.823936474Z", + "publish_time": "2025-12-23T09:34:01.824240686Z", + "first_worker_start": "2025-12-23T09:34:01.824756706Z", + "last_worker_end": "2025-12-23T09:34:01.889175Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:01.824677303Z", + "start_time": "2025-12-23T09:34:01.824777907Z", + "end_time": "2025-12-23T09:34:01.824890812Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:01.82498Z", + "start_time": "2025-12-23T09:34:01.82517Z", + "end_time": "2025-12-23T09:34:01.889175Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:01.824731805Z", + "start_time": "2025-12-23T09:34:01.82484781Z", + "end_time": "2025-12-23T09:34:01.825010316Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:01.824659603Z", + "start_time": "2025-12-23T09:34:01.824756706Z", + "end_time": "2025-12-23T09:34:01.824801708Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4871, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/003a9ae0400c5fdde8cc1c7f273c06d99b4d9958.json b/data/output/003a9ae0400c5fdde8cc1c7f273c06d99b4d9958.json new file mode 100644 index 0000000..9e602e2 --- /dev/null +++ b/data/output/003a9ae0400c5fdde8cc1c7f273c06d99b4d9958.json @@ -0,0 +1,656 @@ +{ + "file_name": "003a9ae0400c5fdde8cc1c7f273c06d99b4d9958.txt", + "total_words": 1253, + "top_n_words": [ + { + "word": "the", + "count": 70 + }, + { + "word": "and", + "count": 48 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "of", + "count": 26 + }, + { + "word": "hotel", + "count": 24 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "it", + "count": 17 + }, + { + "word": "is", + "count": 15 + }, + { + "word": "london", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "8.", + "length": 2 + }, + { + "text": "7.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "9.", + "length": 2 + }, + { + "text": "6.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "6.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "8.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "7.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "10.", + "length": 3 + }, + { + "text": "10.", + "length": 3 + }, + { + "text": "London .", + "length": 8 + }, + { + "text": "THE BEST .", + "length": 10 + }, + { + "text": "What views!", + "length": 11 + }, + { + "text": "ME, Ibiza .", + "length": 11 + }, + { + "text": ") ME, Ibiza .", + "length": 14 + }, + { + "text": "SLS, Las Vegas .", + "length": 16 + }, + { + "text": ") ME, Mallorca .", + "length": 17 + }, + { + "text": "Ham Yard, London .", + "length": 18 + }, + { + "text": "Beaumont, London .", + "length": 18 + }, + { + "text": ") SLS, Las Vegas .", + "length": 19 + }, + { + "text": "Portrait, Florence .", + "length": 20 + }, + { + "text": ") Peninsula, Paris .", + "length": 21 + }, + { + "text": ") Ham Yard, London .", + "length": 21 + }, + { + "text": ") Beaumont, London .", + "length": 21 + }, + { + "text": ") Shangri-La, London .", + "length": 22 + }, + { + "text": "Maalifushi, Maldives .", + "length": 22 + }, + { + "text": ") Raffles, Istanbul .", + "length": 22 + }, + { + "text": "Park Hyatt, New York .", + "length": 22 + }, + { + "text": "Four Seasons, Moscow .", + "length": 22 + }, + { + "text": ") Portrait, Florence .", + "length": 23 + }, + { + "text": ") Maalifushi, Maldives .", + "length": 25 + }, + { + "text": "Loews Regency, New York .", + "length": 25 + }, + { + "text": ") Four Seasons, Moscow .", + "length": 25 + }, + { + "text": ") Park Hyatt, New York .", + "length": 25 + }, + { + "text": ") Waldorf Astoria, Dubai .", + "length": 27 + }, + { + "text": ") Loews Regency, New York .", + "length": 28 + }, + { + "text": ") Cromlix House, Scotland .", + "length": 28 + }, + { + "text": ") Cape Weligama, Sri Lanka .", + "length": 29 + }, + { + "text": ") Waldorf Astoria, Amsterdam .", + "length": 31 + }, + { + "text": ") One and Only, Hayman Island .", + "length": 32 + }, + { + "text": "But they rarely put a foot wrong.", + "length": 33 + }, + { + "text": ") The Brando, French Polynesia .", + "length": 33 + }, + { + "text": "' The Club Lounge of The Beaumont.", + "length": 34 + }, + { + "text": "The ME brand disappoints yet again.", + "length": 35 + }, + { + "text": "And LTI loved the huge rooms and suites.", + "length": 40 + }, + { + "text": ") Four Seasons, Dubai (Jumeirah Beach) 9.", + "length": 42 + }, + { + "text": "And experts concluded that 'perfect it is'.", + "length": 43 + }, + { + "text": "They have their work cut out on this occasion.", + "length": 46 + }, + { + "text": "The experts love the lighter, cleaner lines of the décor.", + "length": 58 + }, + { + "text": "And when you can see them, you know they can see you too...", + "length": 59 + }, + { + "text": "' So, who else got it right this year and who got it wrong?", + "length": 60 + }, + { + "text": "*According to research by LTI (Luxury Travel Intelligence) 3.", + "length": 62 + }, + { + "text": "We have real issues here with poor management, service and food.", + "length": 64 + }, + { + "text": "But this one sadly falls well short of their usual high standards.", + "length": 66 + }, + { + "text": "Here we give you the best and worst new luxury hotels of 2014.....", + "length": 66 + }, + { + "text": "This luxury boutique hotel has unbeatable views of the Ponte Vecchio.", + "length": 69 + }, + { + "text": "*According to research by LTI (Luxury Travel Intelligence) THE WORST .", + "length": 70 + }, + { + "text": "The hotel came first in the list of best new luxury hotel openings of 2014 .", + "length": 76 + }, + { + "text": "We are long-standing fans of the wonderful hotels created by Tim and Kit Kemp.", + "length": 78 + }, + { + "text": "In time it could blossom – and if anyone can achieve this it is the Kemp duo.", + "length": 79 + }, + { + "text": "Here, they have created a wonderful, warm and luxurious hotel in a stunning location.", + "length": 85 + }, + { + "text": "It has a feel of a pop up and appears unfinished and lacking in warmth and character.", + "length": 85 + }, + { + "text": "The overall exterior look is very corporate and jars with the unique character of Soho.", + "length": 87 + }, + { + "text": "Portrait in Florence: This luxury boutique hotel has unbeatable views of the Ponte Vecchi .", + "length": 91 + }, + { + "text": "The Colony Grill Room is outstanding and deservedly one of the hottest reservations in town.", + "length": 92 + }, + { + "text": "Entrance Lobby, The Beaumont: Hotel's opening was pushed back to ensure everything was perfect .", + "length": 97 + }, + { + "text": "The worst of the lot: A design fault at Shangri-La means guests can look into neighbouring rooms .", + "length": 98 + }, + { + "text": "Best of the best: The Beaumont in London opened in September and was deemed 'perfect' by LTI's experts .", + "length": 104 + }, + { + "text": "This iconic property still offers a superb location and some of the largest rooms and suites in the city.", + "length": 105 + }, + { + "text": "And we are also surprised at how such a major financial investment could be followed by such low standards.", + "length": 107 + }, + { + "text": "Third place: Lowes Regency Hotel had a lengthy refurbishment but is an iconic property in a superb location .", + "length": 110 + }, + { + "text": "Fourth worst: Despite having much to like, experts thought Park Hyatt was noisy from 'insufficient glazing' 5.", + "length": 110 + }, + { + "text": "LTI said it was no surprise that the hotel opening was pushed back several times, so that everything would be perfect.", + "length": 118 + }, + { + "text": "Andy Murray's Cromlix hotel was praised for its country atmosphere and charming local staff, putting it in 10th place .", + "length": 119 + }, + { + "text": "As Four Seasons continue their prolific global expansion, it would be easy for the brand (and LTI) to become a little blasé.", + "length": 125 + }, + { + "text": "They said: 'In fact it feels like it has been there forever, and the Art Deco inspired rooms and suites are warm and welcoming.", + "length": 127 + }, + { + "text": "SLS Miami failed to impress us, but after three days at SLS Las Vegas we would have returned to the Miami location in a heartbeat.", + "length": 130 + }, + { + "text": "Couture shoemakers Salvatore Ferragamo own it – and the family have poured heart, soul and money into what is a wonderful property.", + "length": 133 + }, + { + "text": "Traffic and construction noise, from insufficient glazing, is a major design flaw at this 24-hour location – with huge implications.", + "length": 134 + }, + { + "text": "Wonderful island chic accommodations, a great spa and plentiful in/on water activities make this our new favourite Maldives destination.", + "length": 136 + }, + { + "text": "OK, so this is not really a new hotel as such – but the lengthy and well-executed refurbishment has revealed a property that is new in many regards.", + "length": 150 + }, + { + "text": "We're not talking about the amazing London skyline stretched out below you from your room – but, rather, those direct views into neighbouring rooms.", + "length": 150 + }, + { + "text": "It opened to great fanfare promising spectacular views across London, but the first guests of the Shangri-La in The Shard got rather more than they bargained for.", + "length": 162 + }, + { + "text": "This disastrous design fault has already been well documented by the global media, but we have witnessed it first hand on two occasions and it is very disconcerting.", + "length": 165 + }, + { + "text": "Opened by Chris Corbin and Jeremy King, famed for their dining empire including London's The Wolseley, the Beaumont was their long-awaited foray into the hotel world.", + "length": 166 + }, + { + "text": "It’s a shame that they couldn’t get the spa (set to be one of the best in Moscow) ready in time for the opening – an oversight, which marks them down in our books.", + "length": 169 + }, + { + "text": "The global members organisation put together its ultimate list of the best and worst hotel openings of 2014, with London accommodation taking the top spot in both lists.", + "length": 169 + }, + { + "text": "There is much to like about this property, but after three visits we have one important issue, which seems to have gone unaddressed (so far) – and that is noise levels.", + "length": 170 + }, + { + "text": "A little isolated, compared to most of the top resorts, and on a tiny piece of atoll real estate, just 800 metres by 200 metres, thereby delivering a real Maldives experience.", + "length": 175 + }, + { + "text": "A design fault appeared to give the ultimate 'peeping Tom' view into other rooms, with guests complaining about unwanted sights and the Shangri-La hastily erecting some modesty blinds.", + "length": 184 + }, + { + "text": "So perhaps it is unsurprising that the luxury hotel on The Shard's 34th to 52nd floors has been voted the worst hotel opening of 2014 by expert reviewers at Luxury Travel Intelligence.", + "length": 184 + }, + { + "text": "There is so much wrong here – including poor location, poor management and many of the hotel’s dining and entertainment options failing to draw the crowds, meaning shutdowns (or limited opening hours).", + "length": 205 + }, + { + "text": "Fourth: Four seasons in Moscow 'rarely put a foot a wrong' and have created a 'warm and luxurious hotel' Fifth: Maalifushi in Maldives has 'chic accommodations' and a 'great spa' as well as water activities .", + "length": 208 + }, + { + "text": "Second worst: SLS  was deemed to have a 'poor location, poor management' and fails to 'draw the crowds' Many of SLS's dining and entertainment options failed to 'draw the crowds' meaning limited opening hours .", + "length": 211 + }, + { + "text": "Andy Murray's Cromlix House was also praised by the organisation, taking 10th place in the best hotel openings list and praised for creating 'a stand out country house hotel experience, enhanced by warm and caring staff, who are mostly locals.", + "length": 243 + }, + { + "text": "The hotel’s solution has been to fit blinds, but then you are in just another hotel room, which completely erodes the whole purpose of staying here, in one the tallest building in Europe, as room décor and size is only on par with the average London 4/5 star hotel.", + "length": 268 + }, + { + "text": "While the Shangri-La at The Shard was voted worst, with the capital's boutique hotel Han Yard taking third place, the independent Beaumont hotel in London's Mayfair was praised for its Art Deco-inspired rooms and perfect service and took first place for the best hotel opening of the year.", + "length": 289 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7040784657001495 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:02.325415583Z", + "first_section_created": "2025-12-23T09:34:02.325688094Z", + "last_section_published": "2025-12-23T09:34:02.326180613Z", + "all_results_received": "2025-12-23T09:34:02.416722053Z", + "output_generated": "2025-12-23T09:34:02.416981564Z", + "total_processing_time_ms": 91, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 90, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:02.325688094Z", + "publish_time": "2025-12-23T09:34:02.325971105Z", + "first_worker_start": "2025-12-23T09:34:02.326476125Z", + "last_worker_end": "2025-12-23T09:34:02.415848Z", + "total_journey_time_ms": 90, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:02.326402822Z", + "start_time": "2025-12-23T09:34:02.326476125Z", + "end_time": "2025-12-23T09:34:02.326600329Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:02.327216Z", + "start_time": "2025-12-23T09:34:02.32735Z", + "end_time": "2025-12-23T09:34:02.415848Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 88 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:02.326514126Z", + "start_time": "2025-12-23T09:34:02.326582029Z", + "end_time": "2025-12-23T09:34:02.326719734Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:02.326404722Z", + "start_time": "2025-12-23T09:34:02.326501126Z", + "end_time": "2025-12-23T09:34:02.326559928Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:02.326055108Z", + "publish_time": "2025-12-23T09:34:02.326180613Z", + "first_worker_start": "2025-12-23T09:34:02.326530527Z", + "last_worker_end": "2025-12-23T09:34:02.402862Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:02.326569228Z", + "start_time": "2025-12-23T09:34:02.326600329Z", + "end_time": "2025-12-23T09:34:02.326640631Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:02.327002Z", + "start_time": "2025-12-23T09:34:02.327141Z", + "end_time": "2025-12-23T09:34:02.402862Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:02.326548427Z", + "start_time": "2025-12-23T09:34:02.326579929Z", + "end_time": "2025-12-23T09:34:02.326637031Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:02.326475725Z", + "start_time": "2025-12-23T09:34:02.326530527Z", + "end_time": "2025-12-23T09:34:02.326556628Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 163, + "min_processing_ms": 75, + "max_processing_ms": 88, + "avg_processing_ms": 81, + "median_processing_ms": 88, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3683, + "slowest_section_id": 0, + "slowest_section_time_ms": 90 + } +} diff --git a/data/output/003ab07425031001d72b80d3ea80d59fa9a07ccd.json b/data/output/003ab07425031001d72b80d3ea80d59fa9a07ccd.json new file mode 100644 index 0000000..2b5add3 --- /dev/null +++ b/data/output/003ab07425031001d72b80d3ea80d59fa9a07ccd.json @@ -0,0 +1,496 @@ +{ + "file_name": "003ab07425031001d72b80d3ea80d59fa9a07ccd.txt", + "total_words": 1267, + "top_n_words": [ + { + "word": "the", + "count": 67 + }, + { + "word": "to", + "count": 31 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "he", + "count": 17 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "penalty", + "count": 17 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "at", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "So he did.", + "length": 10 + }, + { + "text": "He didn't.", + "length": 10 + }, + { + "text": "Att: 34,739 .", + "length": 13 + }, + { + "text": "And he missed.", + "length": 14 + }, + { + "text": "This was farce.", + "length": 15 + }, + { + "text": "Booked: Morrison, Dorrans.", + "length": 26 + }, + { + "text": "Of course I am disappointed.", + "length": 28 + }, + { + "text": "Booked: Barry, Oviedo, Besic.", + "length": 29 + }, + { + "text": "Everton: Robles 7, Coleman 6.", + "length": 30 + }, + { + "text": "Despicable, Gary Neville called it.", + "length": 35 + }, + { + "text": "' Trouble was, there was no dynamism.", + "length": 37 + }, + { + "text": "Ref: Michael Oliver (Northumberland).", + "length": 38 + }, + { + "text": "We are a team that is very much together.", + "length": 41 + }, + { + "text": "They headed home with their spirits lifted.", + "length": 43 + }, + { + "text": "We needed a dynamic player in the second half.", + "length": 46 + }, + { + "text": "Subs Not Used: Myhill, Pocognoli, Dawson, Samaras.", + "length": 50 + }, + { + "text": "tired in recent weeks, in desperate need of a tonic.", + "length": 52 + }, + { + "text": "West Brom: Foster 8, Wisdom 7, McAuley 7, Lescott 6.", + "length": 52 + }, + { + "text": "And that puts Mirallas' miss into sharper perspective.", + "length": 54 + }, + { + "text": "It all started when Seamus Coleman hoisted in a cross.", + "length": 54 + }, + { + "text": "The disappointment is that we can't finish the penalty.", + "length": 55 + }, + { + "text": "Martinez, by contrast, left with his anxieties increasing.", + "length": 58 + }, + { + "text": "That gives you an idea of what was riding on this encounter.", + "length": 60 + }, + { + "text": "Or – more accurately – in desperate need of three points.", + "length": 61 + }, + { + "text": "Subs Not Used: Hibbert, Garbutt, Alcaraz, McAleny, Griffiths.", + "length": 61 + }, + { + "text": "5, Stones 7, Jagielka 7, Baines 7, Besic 8 (Kone 79), Barry 6.", + "length": 62 + }, + { + "text": "5, Naismith 6, Barkley 6, Mirallas 6 (Oviedo 46, 6), Lukaku 6.", + "length": 62 + }, + { + "text": "And the growls and groans began sweeping around Goodison Park.", + "length": 62 + }, + { + "text": "If we had played this game 10 times, we would have had nine wins.", + "length": 65 + }, + { + "text": "'The issue is we have not scored the penalty and we have not scored.", + "length": 68 + }, + { + "text": "'We are making an issue about a penalty taker who misses the penalty.", + "length": 69 + }, + { + "text": "In a game where there was no margin for error, this was a huge mistake.", + "length": 71 + }, + { + "text": "They knew chances were going to be scarce, that failures would be costly.", + "length": 73 + }, + { + "text": "Kevin Mirallas scored the first penalty at West Ham in the penalty shootout.", + "length": 76 + }, + { + "text": "Steven Naismith also stepped in, imploring Mirallas to 'give it to Bainesy'.", + "length": 76 + }, + { + "text": "Martinez may have tried to defuse it but do not underestimate the ramifications.", + "length": 80 + }, + { + "text": "'It makes it a big issue but if he hits the back of the net, nothing would happen.", + "length": 82 + }, + { + "text": "Romelu Lukaku chested the ball down and, as it bounced up, Joleon Lescott handled.", + "length": 82 + }, + { + "text": "He was not 100 per cent, he was feeling his hamstring and straight after the penalty.", + "length": 85 + }, + { + "text": "Baines had the final say, repeatedly asking Mirallas whether he was sure he wanted to take it.", + "length": 94 + }, + { + "text": "Mirallas was substituted at half-time by Everton manager Roberto Martinez and replaced by Bryan Oviedo .", + "length": 104 + }, + { + "text": "Mirallas and Leighton Baines debated over who should take the penalty as a result of Lescott's handball .", + "length": 105 + }, + { + "text": "As the ball cannoned into the advertising hoardings, groans swept the stadium and Martinez looked bewildered.", + "length": 109 + }, + { + "text": "Baines appeared to back away and let Mirallas take the penalty as he patted the Belgian forward on the head .", + "length": 109 + }, + { + "text": "Jamie Carragher said it was 'selfish'; Roberto Martinez's description of choice, meanwhile, was 'unfortunate'.", + "length": 110 + }, + { + "text": "Referee Michael Oliver had no hesitation pointing to the penalty spot, waving away the token West Brom appeals.", + "length": 111 + }, + { + "text": "Putting himself ahead of his team, he has cranked up the tension and pressure on Everton's floundering campaign.", + "length": 112 + }, + { + "text": "Kevin Mirallas missed a penalty as Everton and West Brom played out a goalless draw at Goodison Park on Monday .", + "length": 112 + }, + { + "text": "A message on the big screen at Goodison Park explaining that a new Rocky movie was being filmed during half-time .", + "length": 114 + }, + { + "text": "West Brom, for whom Claudio Yacob, Gareth McAuley and Ben Foster were superb, stood firm and deserved their point.", + "length": 114 + }, + { + "text": "5, Baird 6, Brunt 6, Yacob 6, Morrison 6 (Sessegnon 65), Gardner 6, Berahino 5 (Dorrans 70), Anichebe 5 (Ideye 79).", + "length": 115 + }, + { + "text": "A film crew on the pitch at Goodison Park at half-time to film a new Rocky movie starring actor Sylvester Stallone .", + "length": 116 + }, + { + "text": "West Brom defender Joleon Lescott handled the ball to give Everton a penalty just before half-time at Goodison Park .", + "length": 117 + }, + { + "text": "Mirallas watches his penalty clip the outside of the post beyond the outstretched arm of West Brom goalkeeper Foster .", + "length": 118 + }, + { + "text": "Usually in such circumstances, Leighton Baines steps up, picks his spot then smashes his drive unerringly into the net.", + "length": 119 + }, + { + "text": "West Brom striker Saido Berahino battles for the ball with Everton's Muhamed Besic during their clash at Goodison Park .", + "length": 120 + }, + { + "text": "Lukaku tries to break the deadlock in the second half for Everton, but watches his shot fly wide of Foster's near post .", + "length": 120 + }, + { + "text": "The Belgian dragged his penalty wide of Ben Foster's goal as the game remained goalless going into the half-time interval .", + "length": 123 + }, + { + "text": "'Leighton Baines is the No 1 penalty taker, but at that point I would have been happy to allow him and Kevin to discuss that.", + "length": 125 + }, + { + "text": "Roberto Martinez watched his side struggle to a draw to extend their winless run to six games on Monday against Tony Pulis' men .", + "length": 129 + }, + { + "text": "He swept his right-footed wide of the post and even if it had been on target, Foster had dived the right way and would have got there.", + "length": 134 + }, + { + "text": "A photograph of actor Stallone appears on a big screen inside the stadium as preparations are made to film part of the new Rocky movie .", + "length": 136 + }, + { + "text": "This time, however, Mirallas was set on the glory, ignoring Lukaku's attempts to take the ball off him and give it to the England international.", + "length": 144 + }, + { + "text": "'At that moment he felt confident and he wanted to take it,' said Martinez, who revealed Mirallas' substitution was down to him suffering a hamstring problem.", + "length": 158 + }, + { + "text": "Muhamed Besic covered a lot of ground (as his heat map above shows) before being replaced in the 79th minute by Arouna Kone - CLICK HERE FOR MATCH ZONE FROM GOODISON PARK .", + "length": 172 + }, + { + "text": "Whatever adjective you wanted to go for, there was no disputing the impact of Kevin Mirallas' actions in the 43rd minute of this unremarkable Barclays Premier League encounter.", + "length": 176 + }, + { + "text": "Everton versus West Bromwich Albion was never a fixture that looked like entering the vaults of football gold but it ended up producing one of the season's most controversial moments.", + "length": 183 + }, + { + "text": "'If Leighton Baines wants to take the penalty, he takes the penalty,' Martinez argued after a 0-0 draw that stretches his team's wretched run to one win in 13 games in all competitions.", + "length": 185 + }, + { + "text": "Cameras were on the pitch at half-time to shoot some crowd scenes for his latest Rocky-based project, 'Creed', but try as they did to rouse themselves, the home supporters were baffled by what had gone on.", + "length": 205 + }, + { + "text": "Baines' record is 15 successful kicks from 16 and missing at Old Trafford in October clearly had no lasting impact, as the following month he successfully converted against Sunderland at the Stadium of Light.", + "length": 208 + }, + { + "text": "' So much for the Hollywood ending: they were filming for Sylvester Stallone's latest blockbuster at Goodison Park on Monday night but, instead, they had to sit through a raspberry; not even a message from Stallone, broadcast on the big screens, could lift the mood.", + "length": 266 + }, + { + "text": "Though Martinez has experienced relegation with Wigan Athletic, it is possible to argue the last two months have been the most testing of his managerial career as Everton have plummeted from being Champions League dark horses to the fringes of the relegation skirmish.", + "length": 268 + }, + { + "text": "That proved to be Mirallas' last contribution, as he never resurfaced after the break, his place being taken by Bryan Oviedo; Martinez had been answering questions about Mirallas' future before this game, in relation to a new contract, but this hardly did the Belgian any good.", + "length": 277 + }, + { + "text": "Looking at the squad they have – a number of promising young internationals allied with experienced old campaigners – Everton should not be rubbing shoulders with those clubs fighting to stay above water but that is what happens when clubs endure a run of one win in 12 games.", + "length": 280 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7206336557865143 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:02.826966295Z", + "first_section_created": "2025-12-23T09:34:02.828593561Z", + "last_section_published": "2025-12-23T09:34:02.828955475Z", + "all_results_received": "2025-12-23T09:34:02.913962099Z", + "output_generated": "2025-12-23T09:34:02.914533322Z", + "total_processing_time_ms": 87, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 85, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:02.828593561Z", + "publish_time": "2025-12-23T09:34:02.828859871Z", + "first_worker_start": "2025-12-23T09:34:02.829352291Z", + "last_worker_end": "2025-12-23T09:34:02.912393Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:02.829518498Z", + "start_time": "2025-12-23T09:34:02.829613902Z", + "end_time": "2025-12-23T09:34:02.829706305Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:02.829871Z", + "start_time": "2025-12-23T09:34:02.829997Z", + "end_time": "2025-12-23T09:34:02.912393Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:02.829461596Z", + "start_time": "2025-12-23T09:34:02.829532398Z", + "end_time": "2025-12-23T09:34:02.829638303Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:02.829279888Z", + "start_time": "2025-12-23T09:34:02.829352291Z", + "end_time": "2025-12-23T09:34:02.829400993Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:02.828896373Z", + "publish_time": "2025-12-23T09:34:02.828955475Z", + "first_worker_start": "2025-12-23T09:34:02.829620202Z", + "last_worker_end": "2025-12-23T09:34:02.91143Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:02.829615202Z", + "start_time": "2025-12-23T09:34:02.829663104Z", + "end_time": "2025-12-23T09:34:02.829710106Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:02.829826Z", + "start_time": "2025-12-23T09:34:02.82996Z", + "end_time": "2025-12-23T09:34:02.91143Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 81 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:02.8295745Z", + "start_time": "2025-12-23T09:34:02.829620202Z", + "end_time": "2025-12-23T09:34:02.82981721Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:02.829627902Z", + "start_time": "2025-12-23T09:34:02.829663104Z", + "end_time": "2025-12-23T09:34:02.829682604Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 163, + "min_processing_ms": 81, + "max_processing_ms": 82, + "avg_processing_ms": 81, + "median_processing_ms": 82, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3669, + "slowest_section_id": 0, + "slowest_section_time_ms": 83 + } +} diff --git a/data/output/003abce1fe4c5754917abc4f4faa305cf577c1b2.json b/data/output/003abce1fe4c5754917abc4f4faa305cf577c1b2.json new file mode 100644 index 0000000..9f98c2e --- /dev/null +++ b/data/output/003abce1fe4c5754917abc4f4faa305cf577c1b2.json @@ -0,0 +1,640 @@ +{ + "file_name": "003abce1fe4c5754917abc4f4faa305cf577c1b2.txt", + "total_words": 1316, + "top_n_words": [ + { + "word": "the", + "count": 51 + }, + { + "word": "to", + "count": 48 + }, + { + "word": "was", + "count": 35 + }, + { + "word": "and", + "count": 34 + }, + { + "word": "i", + "count": 34 + }, + { + "word": "he", + "count": 29 + }, + { + "word": "his", + "count": 22 + }, + { + "word": "her", + "count": 20 + }, + { + "word": "for", + "count": 19 + }, + { + "word": "him", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "I .", + "length": 3 + }, + { + "text": "I .", + "length": 3 + }, + { + "text": "all.", + "length": 4 + }, + { + "text": "We .", + "length": 4 + }, + { + "text": "He may .", + "length": 8 + }, + { + "text": "my tummy.", + "length": 9 + }, + { + "text": "pregnancy.", + "length": 10 + }, + { + "text": "moment of my life.", + "length": 18 + }, + { + "text": "It was up to him .", + "length": 18 + }, + { + "text": "nine weeks of her .", + "length": 19 + }, + { + "text": "wouldn’t help him.", + "length": 20 + }, + { + "text": "- Mother Katyia Rowe .", + "length": 22 + }, + { + "text": "consider a termination.", + "length": 23 + }, + { + "text": "left, no matter how short.", + "length": 26 + }, + { + "text": "my life fully to his care.", + "length": 26 + }, + { + "text": "now if he was ready to go.", + "length": 26 + }, + { + "text": "his condition to be assessed.", + "length": 29 + }, + { + "text": "I never had a moment of doubt.", + "length": 30 + }, + { + "text": "‘My son looked utterly perfect.", + "length": 33 + }, + { + "text": "prepare myself fully for his needs.", + "length": 35 + }, + { + "text": "I was more than happy to dedicate .", + "length": 35 + }, + { + "text": "Katyia was told if her son survived .", + "length": 37 + }, + { + "text": "'If he could smile and play and feel .", + "length": 38 + }, + { + "text": "He was delivered after being induced .", + "length": 38 + }, + { + "text": "‘As he grew bigger I could see his .", + "length": 38 + }, + { + "text": "him the best quality of life possible.", + "length": 38 + }, + { + "text": "‘But I never ever thought like that.", + "length": 38 + }, + { + "text": "She said: ‘It was agony and I knew .", + "length": 38 + }, + { + "text": "as a sign we had done the right thing.", + "length": 38 + }, + { + "text": "‘Not knowing how long he would live .", + "length": 39 + }, + { + "text": "She added: ‘It didn’t phase me at .", + "length": 39 + }, + { + "text": "'He had already given me the greatest .", + "length": 39 + }, + { + "text": "She says: ‘I was shocked but we had .", + "length": 39 + }, + { + "text": "She says: ‘I was prepared not to be .", + "length": 39 + }, + { + "text": "’ And for Katyia the rewards for her .", + "length": 40 + }, + { + "text": "expected to be anything up to five years.", + "length": 41 + }, + { + "text": "life – the time she spent with her son.", + "length": 41 + }, + { + "text": "‘I researched all his disabilities to .", + "length": 41 + }, + { + "text": "‘Further scans were arranged to asses .", + "length": 41 + }, + { + "text": "that I didn’t know what the future held.", + "length": 42 + }, + { + "text": "Just because his life would be shorter or .", + "length": 43 + }, + { + "text": "inside me I knew I couldn’t end his life.", + "length": 43 + }, + { + "text": "Following further tests, doctors told Miss .", + "length": 44 + }, + { + "text": "’ Because of her son’s disabilities he .", + "length": 44 + }, + { + "text": "held on long enough for us to meet properly.", + "length": 44 + }, + { + "text": "’ She added: ‘I thought I didn’t want .", + "length": 45 + }, + { + "text": "world and I will always be grateful for that.", + "length": 45 + }, + { + "text": "Tragically he died nine hours after his birth .", + "length": 47 + }, + { + "text": "Only the 20-week scan highlighted complications.", + "length": 48 + }, + { + "text": "‘Our first scan at three months was wonderful.", + "length": 48 + }, + { + "text": "She said: ‘It was without doubt the happiest .", + "length": 48 + }, + { + "text": "formed properly and he would be severely disabled.", + "length": 50 + }, + { + "text": "She added: ‘It was a shock but we were thrilled.", + "length": 50 + }, + { + "text": "honour of being his mummy for the last nine months.", + "length": 51 + }, + { + "text": "As far as we were concerned everything was perfect.", + "length": 51 + }, + { + "text": "Lucian could have died at anytime in my womb but he .", + "length": 53 + }, + { + "text": "for a severely disabled baby that may not live for long.", + "length": 56 + }, + { + "text": "To donate to charity Sands in Lucian's memory visit here.", + "length": 57 + }, + { + "text": "to know I was doing the right thing by giving him a chance.", + "length": 59 + }, + { + "text": "Shane and I were so excited and looking forward to the birth.", + "length": 61 + }, + { + "text": "different, didn’t mean he didn’t deserve to experience it.", + "length": 62 + }, + { + "text": "meant we were determined to enjoy him for as long as we could.", + "length": 62 + }, + { + "text": "mother as soon as I fell pregnant, that job had started already.", + "length": 64 + }, + { + "text": "little feet and hands prodding through my bump when he wriggled.", + "length": 64 + }, + { + "text": "It was ironic because I had never considered myself particularly .", + "length": 66 + }, + { + "text": "Rowe and her partner of four years that their baby's brain had no .", + "length": 67 + }, + { + "text": "birth he would require 24 -hour care for the duration of his life .", + "length": 67 + }, + { + "text": "didn’t want him given any unnecessary treatment if ultimately it .", + "length": 68 + }, + { + "text": "The couple were offered the chance to terminate the baby at 24-weeks.", + "length": 69 + }, + { + "text": "already decided that after his birth we would let Lucian lead the way.", + "length": 70 + }, + { + "text": "brain abnormalities were so severe they were life limiting we should .", + "length": 70 + }, + { + "text": "As a mother you will do anything for your child and for me I became a .", + "length": 71 + }, + { + "text": "Tragically Lucian, as she named him, died nine hours after he was born.", + "length": 71 + }, + { + "text": "hospital and as expected was rushed straight to special baby care for .", + "length": 71 + }, + { + "text": "the extent of his disabilities but when I saw him smiling and playing .", + "length": 71 + }, + { + "text": "not have been born but he was already our son and I took each movement .", + "length": 72 + }, + { + "text": "learned he loved the shower and would kick when I sprayed the water on .", + "length": 72 + }, + { + "text": "some people questioned if it was worth putting myself through all this .", + "length": 72 + }, + { + "text": "We had so many plans for the future and could not wait to meet our baby.", + "length": 72 + }, + { + "text": "when her waters went on October 23rd last year at the Royal Shrewsbury .", + "length": 72 + }, + { + "text": "only had to look at the scan pictures of him enjoying life in the womb .", + "length": 72 + }, + { + "text": "to be a mother but Lucian taught me it is the most wonderful job in the .", + "length": 73 + }, + { + "text": "then despite his disabilities he deserved to enjoy whatever life he had .", + "length": 73 + }, + { + "text": "maternal but now I wanted nothing more than to care for my son and give .", + "length": 73 + }, + { + "text": "taking our baby straight home like all the other new parents, but beyond .", + "length": 74 + }, + { + "text": "couldn’t swallow the amniotic fluid surrounding him meaning Kaytia had .", + "length": 74 + }, + { + "text": "pregnancy were she says the most joyful and fulfilling nine hours of her .", + "length": 74 + }, + { + "text": "When we saw our baby on screen for the first time we fell in love straight away.", + "length": 80 + }, + { + "text": "’ Before his death he was held in his mother's arms and he even met his grandparents.", + "length": 87 + }, + { + "text": "Katyia, 26, a training administrator, said: 'We were devastated to be told our son’s .", + "length": 88 + }, + { + "text": "’ The couple pictured together in the room that was decorated for their baby boy Lucian.", + "length": 90 + }, + { + "text": "’ The couple decided to wed when their son was old enough to walk down the aisle with them.", + "length": 93 + }, + { + "text": "‘I would talk to him and play him music because I wanted him to experience as much as possible.", + "length": 97 + }, + { + "text": "‘The love and joy I felt the moment they put Lucian in my arms told me it had all been worth it.", + "length": 98 + }, + { + "text": "‘I was told he would never walk or talk yet the scans showed him constantly wriggling and moving.", + "length": 99 + }, + { + "text": "’ Katyia rushed to his side and finally the son she had nurtured for nine months was placed in her arms.", + "length": 106 + }, + { + "text": "Katyia Rowe who went through the birth of her baby, pictured in a frame, and said she didn't regret her decision .", + "length": 114 + }, + { + "text": "Despite the ordeal, Ms Rowe said she had no regrets going through with the birth as she was able to cuddle her baby son.", + "length": 120 + }, + { + "text": "Katyia Rowe and her partner Shane decided to have the baby despite the complications, saying the child deserved to live .", + "length": 121 + }, + { + "text": "’ But shortly after the birth midwives burst into the delivery suite and warned Katyia her son had just minutes to live.", + "length": 122 + }, + { + "text": "‘As long as he was pain free I vowed to let him enjoy his life both while inside me and outside, no matter how long that be.", + "length": 126 + }, + { + "text": "Katyia Rowe was told her baby's brain had not formed properly and that he would never walk or talk and would need 24-hour care.", + "length": 127 + }, + { + "text": "A mother was unable to abort her severely disabled son despite doctors' warnings after seeing her baby's smile in a 3D scan picture.", + "length": 132 + }, + { + "text": "They were then told the tragic news by experts at Birmingham Children's Hospital that their child would never walk or talk and would need 24-hour care.", + "length": 151 + }, + { + "text": "Mother Katyia Rowe gave birth to her severely disabled son Lucian, despite doctors advising her to have an abortion, after seeing a 3D scan of him smiling, pictured .", + "length": 166 + }, + { + "text": "But after seeing real-time moving scans of him smiling, blowing bubbles, kicking and waving his arms she made the heartbreaking decision to go through with the birth.", + "length": 166 + }, + { + "text": "’ Miss Rowe, from Telford, Shropshire was thrilled to discover she was expecting a baby with partner of four years security officer Shane Johnson, 26, in March last year.", + "length": 172 + }, + { + "text": "‘As I watched I knew that while I was carrying him he still had a quality of life and it was my duty as a mother to protect that no matter how long he had left, he deserved to live.", + "length": 183 + }, + { + "text": "She said: ‘Despite all the awful things I was being told, while he was inside me his quality of life looked to be wonderful and no different to any other baby’s, he was a joy to watch.", + "length": 188 + }, + { + "text": "But despite his poor prognosis, being able to watch her son in real time 3D scans during the screening tests, Miss Rowe said she was astonished to see him smiling, blowing bubbles, kicking and waving his arms.", + "length": 209 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5079789012670517 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:03.329722346Z", + "first_section_created": "2025-12-23T09:34:03.331429815Z", + "last_section_published": "2025-12-23T09:34:03.331828131Z", + "all_results_received": "2025-12-23T09:34:03.438337321Z", + "output_generated": "2025-12-23T09:34:03.438538629Z", + "total_processing_time_ms": 108, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 106, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:03.331429815Z", + "publish_time": "2025-12-23T09:34:03.331677625Z", + "first_worker_start": "2025-12-23T09:34:03.332325751Z", + "last_worker_end": "2025-12-23T09:34:03.437451Z", + "total_journey_time_ms": 106, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:03.332183545Z", + "start_time": "2025-12-23T09:34:03.332344252Z", + "end_time": "2025-12-23T09:34:03.332467257Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:03.332493Z", + "start_time": "2025-12-23T09:34:03.33263Z", + "end_time": "2025-12-23T09:34:03.437451Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 104 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:03.33229615Z", + "start_time": "2025-12-23T09:34:03.332367953Z", + "end_time": "2025-12-23T09:34:03.332462757Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:03.332248848Z", + "start_time": "2025-12-23T09:34:03.332325751Z", + "end_time": "2025-12-23T09:34:03.332384354Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:03.331738428Z", + "publish_time": "2025-12-23T09:34:03.331828131Z", + "first_worker_start": "2025-12-23T09:34:03.332368753Z", + "last_worker_end": "2025-12-23T09:34:03.426144Z", + "total_journey_time_ms": 94, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:03.332454356Z", + "start_time": "2025-12-23T09:34:03.332496058Z", + "end_time": "2025-12-23T09:34:03.33254906Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:03.332717Z", + "start_time": "2025-12-23T09:34:03.332846Z", + "end_time": "2025-12-23T09:34:03.426144Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 93 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:03.332339052Z", + "start_time": "2025-12-23T09:34:03.332396554Z", + "end_time": "2025-12-23T09:34:03.332455856Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:03.332313851Z", + "start_time": "2025-12-23T09:34:03.332368753Z", + "end_time": "2025-12-23T09:34:03.332466657Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 197, + "min_processing_ms": 93, + "max_processing_ms": 104, + "avg_processing_ms": 98, + "median_processing_ms": 104, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3552, + "slowest_section_id": 0, + "slowest_section_time_ms": 106 + } +} diff --git a/data/output/003ae6f81b90eae41831361deae8a5da3705dc4f.json b/data/output/003ae6f81b90eae41831361deae8a5da3705dc4f.json new file mode 100644 index 0000000..0c8ec1a --- /dev/null +++ b/data/output/003ae6f81b90eae41831361deae8a5da3705dc4f.json @@ -0,0 +1,358 @@ +{ + "file_name": "003ae6f81b90eae41831361deae8a5da3705dc4f.txt", + "total_words": 861, + "top_n_words": [ + { + "word": "the", + "count": 51 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "s", + "count": 17 + }, + { + "word": "sebastien", + "count": 13 + }, + { + "word": "from", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "said", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Follow Cindy Y.", + "length": 15 + }, + { + "text": "\" from Matt Cyrus.", + "length": 18 + }, + { + "text": "It's not hurting me.", + "length": 20 + }, + { + "text": "Rodriguez on Twitter .", + "length": 22 + }, + { + "text": "It's just your opinion.", + "length": 23 + }, + { + "text": "And the list went on and on.", + "length": 28 + }, + { + "text": "San Antonio supports Sebastien.", + "length": 31 + }, + { + "text": "This is part of the American life.", + "length": 34 + }, + { + "text": "\" This idiot's apparently of color too.", + "length": 39 + }, + { + "text": "I am an American living the American Dream.", + "length": 43 + }, + { + "text": "See how Sebastien is handling the reaction .", + "length": 44 + }, + { + "text": "See the performance by Sebastien De La Cruz .", + "length": 45 + }, + { + "text": "\"I'm a proud American and live in a free country.", + "length": 49 + }, + { + "text": "-- \"Why is a foreigner singing the national anthem.", + "length": 51 + }, + { + "text": "You can't satisfy everyone,\" said de la Cruz to CNN.", + "length": 52 + }, + { + "text": "-- \"Who let this illegal alien sing our national anthem?", + "length": 56 + }, + { + "text": "\"To see people acting this way doesn't make sense anymore.", + "length": 58 + }, + { + "text": "\"Cities like San Antonio are defining a trend of a nation.", + "length": 58 + }, + { + "text": "As for Sebastien, he said the racist comments have not fazed him.", + "length": 65 + }, + { + "text": "\"When he was on 'America's Got Talent,' he faced racism there, too.", + "length": 67 + }, + { + "text": "Yet it's also the name of a group of people in a neighboring country.", + "length": 69 + }, + { + "text": "-- \"Why are ppl so upset over a Hispanic singing the national anthem.", + "length": 69 + }, + { + "text": "\" It will take a lot more than some racist tweets to bring Sebastien down.", + "length": 74 + }, + { + "text": "I realize that's San Antonio but that still ain't Mexico\" from Lewie Groh.", + "length": 74 + }, + { + "text": "-- \"Why they got a Mexican kid singing the national anthem -___-\" from Daniel Gilmore.", + "length": 86 + }, + { + "text": "-- \"Racist scumbag says Chicano kid singing nat'l anthem just \"snuck into the country.", + "length": 86 + }, + { + "text": "He said he owes his positive outlook to his parents, family and everyone in San Antonio.", + "length": 88 + }, + { + "text": "Juan de la Cruz, Sebastien's father, hasn't taken the negative comments personally at all.", + "length": 90 + }, + { + "text": "He's probably got more roots in here than most 'Americans',\" said Amanda Aguirre on Twitter.", + "length": 92 + }, + { + "text": "\"For those that said something bad about me, I understand it's your opinion,\" said Sebastien to CNN.", + "length": 100 + }, + { + "text": "Other media outlets used that post as the foundation for the story, and the story took off from there.", + "length": 102 + }, + { + "text": "@A2daO,\" from Laura Gonzalez, a Chicana Santa Rosa City Schools board member and middle school teacher.", + "length": 103 + }, + { + "text": "The blog posted screen shots of the tweets so it could continue to display the public microbloggers' rants.", + "length": 107 + }, + { + "text": "The \"boy with the golden voice\" tweeted earlier today: \"Please do not pay attention to the negative people.", + "length": 107 + }, + { + "text": "Here's a sampling of some of the unkind tweets that went flying around the Internet about Sebastien de la Cruz: .", + "length": 113 + }, + { + "text": "\" Sebastien said today was like any other day, but he's always grateful to wake up to yet another day able to sing.", + "length": 115 + }, + { + "text": "\" That the word \"Mexican\" is being used as a derogatory term is part of the problem, said Noriega, \"It's become the N-word of Mexican descent.", + "length": 142 + }, + { + "text": "\" A collection of the negative tweets was posted on Public Shaming, a Tumblr blog dedicated to outing and shaming racists' social media posts.", + "length": 143 + }, + { + "text": "-- \"How you singing the national anthem looking like an illegal immigrant\" from Andre Lacey, proud father and firefighter from Augusta, Georgia.", + "length": 144 + }, + { + "text": "For some Mexican-Americans, the incident was just the latest sign of a persistent problem they face: being treated as outsiders in their own country.", + "length": 149 + }, + { + "text": "-- And, from the American Latino Museum, \"We're proud of the 11-year old San Anto-native Sebastien de la Cruz for his amazing performance last night!", + "length": 149 + }, + { + "text": "After the harsh reaction spread across the Internet, tweets supporting and defending Sebastien and vociferously denouncing his critics started to take over: .", + "length": 158 + }, + { + "text": "-- \"That little 10 year old mariachi National Anthem singer has more talent and grace than the combined racist pig idiots on Twitter,\" from Mexican-American cartoonist Lalo Alcaraz .", + "length": 182 + }, + { + "text": "\" San Antonio is a multicultural city with more than 55% of the population being Hispanic and 90% of those people identifying themselves as Mexican according to the Pew Hispanic Center.", + "length": 185 + }, + { + "text": "However, Public Shaming must have suspected what would happen once the racist tweets were posted because most of the Twitter handles have been deactivated or the tweet has been removed.", + "length": 185 + }, + { + "text": "The blog highlighted 28 tweets from NBA fans who came off as offended and ashamed that the Spurs would allow the boy, who happened to have been born and raised in San Antonio, to sing the nation's anthem.", + "length": 204 + }, + { + "text": "It's a demographic fact that the country is changing and Latinos are going to be behind that economic push that moves everyone forward,\" said Chon Noriega, director of the UCLA Chicano Studies Research Center, to CNN.", + "length": 217 + }, + { + "text": "\"I think people reacted the way they did because Sebastien was wearing his mariachi outfit,\" said de la Cruz, \"But, it doesn't make sense to listen to those people when most of the feedback we have gotten is positive.", + "length": 217 + }, + { + "text": "\" Sebastien has been singing since he was 5 but gained fame in 2012 after being on NBC's \"America's Got Talent\" for singing his mariachi ballads with hopes of winning to help his younger brother get surgery for his hearing problem.", + "length": 231 + }, + { + "text": "An 11-year-old boy's rendition of the national anthem at Game 3 of the NBA finals brought the usual appreciative applause Tuesday, but outside AT\u0026T Center in San Antonio, his performance brought a darker reaction from some posters on social media -- and eventually an online backlash against their racist comments.", + "length": 314 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.572281539440155 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:03.832637904Z", + "first_section_created": "2025-12-23T09:34:03.833117923Z", + "last_section_published": "2025-12-23T09:34:03.833353233Z", + "all_results_received": "2025-12-23T09:34:03.897298009Z", + "output_generated": "2025-12-23T09:34:03.897505117Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:03.833117923Z", + "publish_time": "2025-12-23T09:34:03.833353233Z", + "first_worker_start": "2025-12-23T09:34:03.834009559Z", + "last_worker_end": "2025-12-23T09:34:03.896314Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:03.833922956Z", + "start_time": "2025-12-23T09:34:03.834009559Z", + "end_time": "2025-12-23T09:34:03.834111563Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:03.834104Z", + "start_time": "2025-12-23T09:34:03.834246Z", + "end_time": "2025-12-23T09:34:03.896314Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:03.833925756Z", + "start_time": "2025-12-23T09:34:03.83401906Z", + "end_time": "2025-12-23T09:34:03.834125564Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:03.833864553Z", + "start_time": "2025-12-23T09:34:03.834009559Z", + "end_time": "2025-12-23T09:34:03.834055961Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4954, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/003b021e19cfb2a140336bbfa299911b110a900c.json b/data/output/003b021e19cfb2a140336bbfa299911b110a900c.json new file mode 100644 index 0000000..66b8d7a --- /dev/null +++ b/data/output/003b021e19cfb2a140336bbfa299911b110a900c.json @@ -0,0 +1,306 @@ +{ + "file_name": "003b021e19cfb2a140336bbfa299911b110a900c.txt", + "total_words": 839, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "and", + "count": 31 + }, + { + "word": "i", + "count": 22 + }, + { + "word": "is", + "count": 21 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "her", + "count": 16 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "she", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "' Still with us?", + "length": 16 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'In fact, I don't wear any.", + "length": 27 + }, + { + "text": "' One for the Christmas stocking?", + "length": 33 + }, + { + "text": "' So what does 'beauty' mean to the star?", + "length": 41 + }, + { + "text": "'I've been doing this for a long time,' she explained.", + "length": 54 + }, + { + "text": "'The next year is looking good for me and I can’t wait.", + "length": 57 + }, + { + "text": "'The older you get, the less interested in make-up you are.", + "length": 59 + }, + { + "text": "'I am looking forward to getting back to work,' added the model.", + "length": 64 + }, + { + "text": "'My children and relaxing at home with them and having quality time.", + "length": 68 + }, + { + "text": "Vaseline is the only thing in my make-up bag to keep my lips moisturised.", + "length": 73 + }, + { + "text": "'I'm quite good at eating healthily but biscuits are my weakness,' she admits.", + "length": 78 + }, + { + "text": "'I think the name Kissable Fierce is quite reflective of my personality,' she explained.", + "length": 88 + }, + { + "text": "A smokey eye is ageing and with five kids, the last thing on my mind is make-up,' says Price.", + "length": 93 + }, + { + "text": "'My perfumes have been best sellers and I'm involved in every aspect of the production as well.", + "length": 95 + }, + { + "text": "'The day starts off well with a freshly made juice but it goes downhill, usually in the evening.", + "length": 96 + }, + { + "text": "'I’ve moved house, it’s a fresh start and I am eager to get back to doing what I do best - working.", + "length": 103 + }, + { + "text": "'This is the problem with my fitness - I come up with all these ideas to work out and then I literally don't have time.", + "length": 119 + }, + { + "text": "'It's my first evening fragrance that I've released and I love it,' she said in an exclusive interview with MailOnline.", + "length": 119 + }, + { + "text": "She might have just become a mother for the fifth time but it seems Katie Price, 36, has no intention of putting her feet up.", + "length": 125 + }, + { + "text": "But it is the name of the fragrance, Kissable Fierce, that most reflects Ms Price, who said it was inspired by her personality.", + "length": 127 + }, + { + "text": "She also used the glamorous night as a platform to show off her post-baby body, which, as photos reveal, is more toned than ever.", + "length": 129 + }, + { + "text": "' The perfume is, she says, an extension of her changing lifestyle and reflects the more mature, more sophisticated woman she is today.", + "length": 135 + }, + { + "text": "'I started creating Kissable Fierce a year ago and wanted to bring out an affordable, long lasting evening perfume that my fans would like.", + "length": 139 + }, + { + "text": "'I'm the most loyal person and fun-loving and kind but if people take advantage of those good qualities I can have a fierce side that comes out too.", + "length": 148 + }, + { + "text": "And despite being one of a veritable deluge of celebrity fragrances, Price says hers is different - and it's all down to her experience of the showbiz world.", + "length": 157 + }, + { + "text": "' Back on track: Katie Price and Kieran Hayler are looking to the future and Katie says true beauty is spending time with her children and relaxing at home .", + "length": 157 + }, + { + "text": "Moving on: Katie, pictured at the launch of her last fragrance, says her latest creation is her best yet and she wanted to create something that her fans would love .", + "length": 166 + }, + { + "text": "Family is clearly important to Price, who has managed to overcome husband Kieran's infidelity and presented a united front at the recent Hunger Games premiere in London.", + "length": 169 + }, + { + "text": "Katie's new fragrance is just the latest in a roster of celebrity scents but she maintains that hers is different and she apparently gets plenty of compliments when she wears it .", + "length": 179 + }, + { + "text": "Indeed, the 36-year-old claims that she's so disinterested in beautifying herself - apart from for public appearances - that the only thing in her make-up bag is a tub of Vaseline.", + "length": 180 + }, + { + "text": "' Price, 36, who gave birth to her fifth child in August, also said that not only are the orange and peach notes 'gorgeous', she's been getting plenty of compliments while wearing it.", + "length": 184 + }, + { + "text": "' Au natural: The 36-year-old maintains that she's so disinterested in beautifying herself these days - apart from for public appearances - that the only thing in her make-up bag is a tub of Vaseline .", + "length": 202 + }, + { + "text": "Between writing a new book, signing up with a hair colour brand and slamming Kim Kardashian's naked magazine cover, the former glamour model has found time to create a new fragrance named 'Kissable Fierce'.", + "length": 206 + }, + { + "text": "Good, because she is also working on a photo call for the perfume, penning her next tome, celebrating Superdrug’s 50th Anniversary with the Medichem team, doing 'something that hasn't been done before' and heading over to Australia again - all in the next few months.", + "length": 269 + }, + { + "text": "New look: Katie Price is unveiling her latest fragrance, Kissable Fierce, and says it's reflective of her personality and smells 'gorgeous' 'It's an extension of my Kissable line - the bottle is so cool and I love the fact it's pink and black - two of my favourite colours.", + "length": 274 + }, + { + "text": "'I'm working on the release of Kissable Fierce, I'm Global Ambassador for CB4, I've also got to factor in writing my next autobiography, which I'm doing at the moment, and then TV work and my TV role for Channel 7 in Australia plus the kids, my husband and family and friends.", + "length": 276 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.43836918473243713 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:04.334199607Z", + "first_section_created": "2025-12-23T09:34:04.334568122Z", + "last_section_published": "2025-12-23T09:34:04.334900735Z", + "all_results_received": "2025-12-23T09:34:04.400366972Z", + "output_generated": "2025-12-23T09:34:04.40056268Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:04.334568122Z", + "publish_time": "2025-12-23T09:34:04.334900735Z", + "first_worker_start": "2025-12-23T09:34:04.335323452Z", + "last_worker_end": "2025-12-23T09:34:04.3995Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:04.335362454Z", + "start_time": "2025-12-23T09:34:04.335445157Z", + "end_time": "2025-12-23T09:34:04.335557662Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:04.335552Z", + "start_time": "2025-12-23T09:34:04.335702Z", + "end_time": "2025-12-23T09:34:04.3995Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:04.335356154Z", + "start_time": "2025-12-23T09:34:04.335440057Z", + "end_time": "2025-12-23T09:34:04.335551162Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:04.33525435Z", + "start_time": "2025-12-23T09:34:04.335323452Z", + "end_time": "2025-12-23T09:34:04.335375055Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4558, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/003b023fa2971dfe522f39f0c36370787ff52aca.json b/data/output/003b023fa2971dfe522f39f0c36370787ff52aca.json new file mode 100644 index 0000000..bb102e6 --- /dev/null +++ b/data/output/003b023fa2971dfe522f39f0c36370787ff52aca.json @@ -0,0 +1,290 @@ +{ + "file_name": "003b023fa2971dfe522f39f0c36370787ff52aca.txt", + "total_words": 412, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "i", + "count": 13 + }, + { + "word": "kitten", + "count": 13 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "it", + "count": 11 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "is", + "count": 8 + }, + { + "word": "was", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "James Rush .", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "12:47 EST, 1 August 2013 .", + "length": 26 + }, + { + "text": "14:55 EST, 1 August 2013 .", + "length": 26 + }, + { + "text": "I think he swallowed some oil.", + "length": 30 + }, + { + "text": "'I suppose I saved it’s life.", + "length": 31 + }, + { + "text": "'It is scary that people could do this.", + "length": 39 + }, + { + "text": "' The kitten is thought to be about six weeks old.", + "length": 50 + }, + { + "text": "I think the man that rescured the kitten is a hero.", + "length": 51 + }, + { + "text": "We will look after the kitten until we can home it.", + "length": 51 + }, + { + "text": "They may have been covering it in oil to set it alight.", + "length": 55 + }, + { + "text": "' The youths are believed to have been about 14-years-old.", + "length": 58 + }, + { + "text": "'I did not think it would make it through the first night.", + "length": 58 + }, + { + "text": "He said: 'I was working in Chester-le-Street and was lost.", + "length": 58 + }, + { + "text": "'He may be alright, but problems may arise in the next few days.", + "length": 64 + }, + { + "text": "I pulled up to make a phone call and saw the youths messing about.", + "length": 66 + }, + { + "text": "' Anyone who can help home the kitten is asked to call 0845 313 4749.", + "length": 69 + }, + { + "text": "The kitten was traumatised - these people are just imbeciles and cruel.", + "length": 71 + }, + { + "text": "'I got out and took the kitten off them while they shouted abuse at me.", + "length": 71 + }, + { + "text": "'Thankfully incidents like this are rare, but we are seeing an increase in them.", + "length": 80 + }, + { + "text": "This is the first case I have had where someone has tried to drown a kitten in oil.", + "length": 83 + }, + { + "text": "A gang of teenagers left a kitten 'traumatised' after they tried to drown it in oil.", + "length": 84 + }, + { + "text": "Rescued: Chester with Marion Maychell, volunteer co-ordinator at Wear Valley and Darlington Cats Protection .", + "length": 109 + }, + { + "text": "The kitten, which has been named Chester, is now being cared for at Wear Valley and Darlington Cats Protection.", + "length": 111 + }, + { + "text": "It was lucky I was in a van and higher up so I saw it, because if i had been in a car then I would not have seen it.", + "length": 116 + }, + { + "text": "Vet Jeremy Demeyre, of the Wilson Veterinary Group, in Bishop Auckland, said: 'We washed the kitten and clipped its fur.", + "length": 120 + }, + { + "text": "Marion Maychell, a volunteer co-ordinator at the cat rescue charity, said: 'We are hoping that the kitten is going to be ok.", + "length": 124 + }, + { + "text": "'Traumatised': Chester the kitten (pictured shortly after being rescued) was dumped into a bucket of oil by a group of teenagers .", + "length": 130 + }, + { + "text": "The gang is also suspected of planning on setting the six-week-old kitten on fire before a passer-by intervened and saved the cat's life.", + "length": 137 + }, + { + "text": "The man, who did not want to be named, was driving through Chester-le-Street, in County Durham, when he saw the teenagers 'messing about'.", + "length": 138 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5161707401275635 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:04.835724809Z", + "first_section_created": "2025-12-23T09:34:04.837848294Z", + "last_section_published": "2025-12-23T09:34:04.838041802Z", + "all_results_received": "2025-12-23T09:34:04.91419827Z", + "output_generated": "2025-12-23T09:34:04.914337975Z", + "total_processing_time_ms": 78, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 76, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:04.837848294Z", + "publish_time": "2025-12-23T09:34:04.838041802Z", + "first_worker_start": "2025-12-23T09:34:04.838603825Z", + "last_worker_end": "2025-12-23T09:34:04.913134Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:04.838588924Z", + "start_time": "2025-12-23T09:34:04.838664827Z", + "end_time": "2025-12-23T09:34:04.838719429Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:04.838841Z", + "start_time": "2025-12-23T09:34:04.838983Z", + "end_time": "2025-12-23T09:34:04.913134Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:04.838607925Z", + "start_time": "2025-12-23T09:34:04.838678728Z", + "end_time": "2025-12-23T09:34:04.83873303Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:04.838545022Z", + "start_time": "2025-12-23T09:34:04.838603825Z", + "end_time": "2025-12-23T09:34:04.838625226Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 74, + "min_processing_ms": 74, + "max_processing_ms": 74, + "avg_processing_ms": 74, + "median_processing_ms": 74, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2176, + "slowest_section_id": 0, + "slowest_section_time_ms": 75 + } +} diff --git a/data/output/003b063ae76e599811604adf5df48d191004c197.json b/data/output/003b063ae76e599811604adf5df48d191004c197.json new file mode 100644 index 0000000..1c243d7 --- /dev/null +++ b/data/output/003b063ae76e599811604adf5df48d191004c197.json @@ -0,0 +1,314 @@ +{ + "file_name": "003b063ae76e599811604adf5df48d191004c197.txt", + "total_words": 716, + "top_n_words": [ + { + "word": "the", + "count": 34 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "with", + "count": 15 + }, + { + "word": "malia", + "count": 14 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "obama", + "count": 12 + }, + { + "word": "on", + "count": 12 + }, + { + "word": "at", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "'They love Jay Z.", + "length": 17 + }, + { + "text": "They love Beyonce.", + "length": 18 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "She's also learning how to drive.", + "length": 33 + }, + { + "text": "Lorde played her set on Friday night.", + "length": 37 + }, + { + "text": "Kelly came on stage, on Sunday night.", + "length": 37 + }, + { + "text": "A regular three-day ticket costs $250.", + "length": 38 + }, + { + "text": "Around 300,000 people attended the three-day event .", + "length": 52 + }, + { + "text": "A burly Secret Service agent can also be seen to her left .", + "length": 59 + }, + { + "text": "First daughter Malia Obama was spotted in the crowd at the gig .", + "length": 64 + }, + { + "text": "She left after watching the last act Chance The Rapper, where R.", + "length": 64 + }, + { + "text": "Singer Lorde pictured performing at Lollapalooza in Chicago on Friday.", + "length": 70 + }, + { + "text": "Music fans took to Twitter to share their excitement over seeing the first daughter.", + "length": 84 + }, + { + "text": "Malia Obama was spotted attending music festival Lollapalooza in Chicago this weekend.", + "length": 86 + }, + { + "text": "Malia goes into the 11th grade this fall at the private Sidwell Friends School in Washington.", + "length": 93 + }, + { + "text": "'They've gotten to know Beyonce, and she has always been very sweet to them,' Mr Obama added.", + "length": 93 + }, + { + "text": "Fans listen to Manchester Orchestra on Saturday on the second day of Lollapalooza in Chicago.", + "length": 93 + }, + { + "text": "One teenager, Sarah, tweeted a selfie with Malia with the caption: 'J chillin with Malia Obama.", + "length": 95 + }, + { + "text": "Headline acts included Kings Of Leon, Eminem, Outkast, Arctic Monkeys, Skrillex and Calvin Harris.", + "length": 98 + }, + { + "text": "The music-loving 16-year-old was ready for the muddy turf at the all-day gig in a pair of sturdy boots.", + "length": 103 + }, + { + "text": "The teenager stayed until the finale was over and then left the park on foot with friends, according to TMZ.", + "length": 108 + }, + { + "text": "One disappointed fan tweeted this message after seeing the 16-year-old hanging out with friends at Lollapalooza .", + "length": 113 + }, + { + "text": "Another, JD, tweeted: 'I asked Malia Obama for a picture and she said she wasn't allowed to take pictures with people.", + "length": 118 + }, + { + "text": "' Lollapalooza took place over three days at Grant Park in Chicago from July 31 with as many as 300,000 people in attendance.", + "length": 125 + }, + { + "text": "In an interview with People magazine in December, Michelle Obama revealed that her daughters are big fans of Beyonce's music.", + "length": 125 + }, + { + "text": "First daughter Malia Obama, who recently turned 16, had fun with friends this weekend at music festival Lollapalooza in Chicago .", + "length": 129 + }, + { + "text": "' Malia was pictured at one point in the crowd standing next to a burly man dressed in black believed to be Secret Service, TMZ reported.", + "length": 137 + }, + { + "text": "The teen did her best to blend in with other young people a the all-day music event but her whereabouts were eventually outed on Twitter .", + "length": 138 + }, + { + "text": "Malia Obama looked like any other festival-goer this weekend in a summery skirt and top teamed with heavy boots, perfect for muddy fields .", + "length": 139 + }, + { + "text": "Fans walk by the Lollapalooza sign in Chicago's Grant Park on Saturday where members of the audience were excited to see Malia Obama watching bands .", + "length": 149 + }, + { + "text": "The 16-year-old (pictured center here) surrounded by other teens appears to be having a fun-packed summer while on vacation from her private DC school .", + "length": 152 + }, + { + "text": "The 16-year-old has also been touring different colleges in California, including Berkeley and Stanford, as she prepares to head off to college in 2016.", + "length": 152 + }, + { + "text": "Sasha is still a One Direction fan, although I think Malia has become a little less into boy bands,' Mrs Obama said in a joint interview for People Magazine.", + "length": 157 + }, + { + "text": "The teenager attempted to blend in with the crowd in a summery daisy-print skirt and top and some grungy boots suited for the muddy field - but failed to do so.", + "length": 160 + }, + { + "text": "Excited music fans took to Twitter after spotting the first daughter trying to blend in with the crowd at the music festival this weekend in Grant Park, Chicago .", + "length": 162 + }, + { + "text": "President Obama's eldest daughter, who recently celebrated her 16th birthday, was attending the gig with friends - and a few burly Secret Service members on Sunday.", + "length": 164 + }, + { + "text": "' One journalist Jessica Hopper tweeted that she had seen the 16-year-old watching Lorde, writing: 'i was standing next to malia obama (plus secret service) = total utopian teen dream!", + "length": 184 + }, + { + "text": "Malia is an avid music fan - last month she went to see Beyonce and Jay-Z in Chicago with mom Michelle and younger sister Sasha, 13, while the First Lady was in town for a Democratic fundraiser.", + "length": 194 + }, + { + "text": "Already this summer, Malia has worked in Los Angeles as a production assistant on the set of 'Extant,' a new CBS sci-fi thriller starring Halle Berry and produced by Democratic Party donor Steven Spielberg.", + "length": 206 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.563034176826477 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:05.338425758Z", + "first_section_created": "2025-12-23T09:34:05.339857116Z", + "last_section_published": "2025-12-23T09:34:05.340090725Z", + "all_results_received": "2025-12-23T09:34:05.405386755Z", + "output_generated": "2025-12-23T09:34:05.405560162Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:05.339857116Z", + "publish_time": "2025-12-23T09:34:05.340090725Z", + "first_worker_start": "2025-12-23T09:34:05.340657348Z", + "last_worker_end": "2025-12-23T09:34:05.40449Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:05.340638247Z", + "start_time": "2025-12-23T09:34:05.340734451Z", + "end_time": "2025-12-23T09:34:05.340814254Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:05.34084Z", + "start_time": "2025-12-23T09:34:05.340978Z", + "end_time": "2025-12-23T09:34:05.40449Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:05.340600846Z", + "start_time": "2025-12-23T09:34:05.340680849Z", + "end_time": "2025-12-23T09:34:05.340768252Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:05.340584845Z", + "start_time": "2025-12-23T09:34:05.340657348Z", + "end_time": "2025-12-23T09:34:05.34070725Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4104, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/003b856704634429282caa28ef8b0c052e04bf18.json b/data/output/003b856704634429282caa28ef8b0c052e04bf18.json new file mode 100644 index 0000000..c5d3a4a --- /dev/null +++ b/data/output/003b856704634429282caa28ef8b0c052e04bf18.json @@ -0,0 +1,234 @@ +{ + "file_name": "003b856704634429282caa28ef8b0c052e04bf18.txt", + "total_words": 387, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "he", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "pacquiao", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "i", + "count": 6 + }, + { + "word": "me", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "All I did was do my job.", + "length": 24 + }, + { + "text": "This helps me, it helps a lot.", + "length": 30 + }, + { + "text": "People ridiculed me, demonized me.", + "length": 34 + }, + { + "text": "\"I always think positive and not negative.", + "length": 42 + }, + { + "text": "\"It was like I stole something from the world that night.", + "length": 57 + }, + { + "text": "\"My mind is set in the winners side not on the losers side,\" he said.", + "length": 69 + }, + { + "text": "\"He said I don't have the killer instinct any more, I don't have the aggressiveness any more.", + "length": 93 + }, + { + "text": "\" \"This fight on Saturday will be to prove that my journey in boxing will continue and I'm excited for that.", + "length": 108 + }, + { + "text": "\"What really motivates me for this fight is what my opponent said,\" Pacquiao countered in riposte to Bradley's claims.", + "length": 118 + }, + { + "text": "Pacquiao has won world titles at seven weight divisions in a career stretching 18 years but has lost two of his last three fights.", + "length": 130 + }, + { + "text": "He questioned Pacquiao's hunger at the pre-fight press conference Thursday, suggesting the legendary Filipino's best days are behind him.", + "length": 137 + }, + { + "text": "\" Bradley insists these dark experiences made him stronger as a person and will act as the perfect motivation ahead of the MGM Grand rematch.", + "length": 141 + }, + { + "text": "The WBO welterweight champion won a contentious points decision when the pair met in June 2012, inflicting a first defeat on Pacquiao in seven years.", + "length": 149 + }, + { + "text": "Bradley claims he subsequently received death threats and that he was unfairly targeted by fans and the media alike for the decision of the ringside judges.", + "length": 156 + }, + { + "text": "Boxing commentators roundly criticized the result while former heavyweight champion Lennox Lewis said the scoring showed that boxing had lost its integrity.", + "length": 156 + }, + { + "text": "\"It would mean the world to me to get this victory for me and my family because of everything we went through in the past,\" the 30-year old told CNN's Don Riddell.", + "length": 163 + }, + { + "text": "\" Although he recorded a comfortable victory over Brandon Rios last November, Pacquiao was knocked out for the first time since 1999 by Juan Manuel Marquez in his previous bout.", + "length": 177 + }, + { + "text": "(CNN) -- Timothy Bradley says he needs to beat Manny Pacquiao for a second time in Las Vegas on Saturday to move on from the controversial conclusion of their first fight two years ago.", + "length": 185 + }, + { + "text": "At 35, it would be difficult for Pacquiao to resurrect his career at the very highest level if defeated by Bradley for a second time but he refuses to entertain this notion ahead of the contest.", + "length": 194 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5061115026473999 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:05.840889097Z", + "first_section_created": "2025-12-23T09:34:05.842786174Z", + "last_section_published": "2025-12-23T09:34:05.842965981Z", + "all_results_received": "2025-12-23T09:34:05.902692187Z", + "output_generated": "2025-12-23T09:34:05.902851693Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:05.842786174Z", + "publish_time": "2025-12-23T09:34:05.842965981Z", + "first_worker_start": "2025-12-23T09:34:05.843510303Z", + "last_worker_end": "2025-12-23T09:34:05.901694Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:05.843485402Z", + "start_time": "2025-12-23T09:34:05.843531504Z", + "end_time": "2025-12-23T09:34:05.843579706Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:05.843677Z", + "start_time": "2025-12-23T09:34:05.843824Z", + "end_time": "2025-12-23T09:34:05.901694Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:05.8434465Z", + "start_time": "2025-12-23T09:34:05.843510303Z", + "end_time": "2025-12-23T09:34:05.843575206Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:05.843456301Z", + "start_time": "2025-12-23T09:34:05.843510503Z", + "end_time": "2025-12-23T09:34:05.843534104Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2179, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/003bb408e41ceb2d537a74746c69d7dbd9f1da45.json b/data/output/003bb408e41ceb2d537a74746c69d7dbd9f1da45.json new file mode 100644 index 0000000..403e638 --- /dev/null +++ b/data/output/003bb408e41ceb2d537a74746c69d7dbd9f1da45.json @@ -0,0 +1,258 @@ +{ + "file_name": "003bb408e41ceb2d537a74746c69d7dbd9f1da45.txt", + "total_words": 473, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "he", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "his", + "count": 10 + }, + { + "word": "lyle", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "i", + "count": 8 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "of", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "'It was easy for me.", + "length": 20 + }, + { + "text": "But he needn't have worried.", + "length": 28 + }, + { + "text": "'He's not that much older than myself.", + "length": 38 + }, + { + "text": "'I'd love to meet him some day,' Lyle said.", + "length": 43 + }, + { + "text": "I just can't imagine what he's going through.", + "length": 45 + }, + { + "text": "'He's my hero,' his mom Christine Sciacca said.", + "length": 47 + }, + { + "text": "The decision, however, was a no-brainer, he said.", + "length": 49 + }, + { + "text": "'I was pretty terrified at first, but it is starting to settle in.", + "length": 66 + }, + { + "text": "'I couldn't be more proud of him and how he's been so humble about it.", + "length": 70 + }, + { + "text": "They gave me the timeline and everything's been moving quickly after that.", + "length": 74 + }, + { + "text": "'I knew right away I was definitely going to donate,' Lyle told the Eagle Tribune.", + "length": 82 + }, + { + "text": "The athlete's mother and team coach were both extremely supportive of his decision.", + "length": 83 + }, + { + "text": "'They told me it was a one in five million chance of me being a match for a non-family member.", + "length": 94 + }, + { + "text": "' Lyle is undergoing the surgery at Massachusetts General Hospital in Boston later this month.", + "length": 94 + }, + { + "text": "Easy decision: The decision to try save the 28-year-old man's life was a no-brainer, Lyle, pictured, said .", + "length": 107 + }, + { + "text": "'He has six months to live and I have the possibility to buy him a couple more years,' Lyle told the newspaper.", + "length": 111 + }, + { + "text": "After a year, Lyle and the recipient will both have the choice to sign consent forms to reveal their identities.", + "length": 112 + }, + { + "text": "' But Lyle said he felt like he had been called into the principal's office when he went to tell his coach, Jim Boulanger, the news.", + "length": 132 + }, + { + "text": "' The recipient, who is suffering from acute lymphoblastic leukemia, and Lyle are required by law to remain anonymous to each other for one year.", + "length": 145 + }, + { + "text": "'I told him, \"you either do 12 throws at the conference championships, or you give another man a few more years,\"' Boulanger told the Eagle Tribune.", + "length": 148 + }, + { + "text": "Sacrifice: Mr Lyle, pictured, is being hailed a hero after he quit his final season so he can donate bone marrow to a stranger suffering from leukemia .", + "length": 152 + }, + { + "text": "But donating his bone marrow to the anonymous recipient means he'll have to miss the final two meets of his career, including the America East Championships.", + "length": 157 + }, + { + "text": "A University of New Hampshire shot put star is being hailed a hero after he quit his final athletics season so he can donate bone marrow to a stranger suffering leukemia.", + "length": 170 + }, + { + "text": "But that responsibility means he'll be unable to lift more than 20 pounds over his head for a few weeks, ruling out throwing the discus, hammer and shot put in the championships.", + "length": 178 + }, + { + "text": "Like the rest of his track and field team, Cameron Lyle, 21, of Plaistow, joined the bone marrow registry in his sophomore year and a few weeks ago, he learned he was a 100 per cent match for a 28-year-old man who has just been given six months to live.", + "length": 253 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.45104295015335083 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:06.343535544Z", + "first_section_created": "2025-12-23T09:34:06.343878158Z", + "last_section_published": "2025-12-23T09:34:06.344061665Z", + "all_results_received": "2025-12-23T09:34:06.408364456Z", + "output_generated": "2025-12-23T09:34:06.408497461Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:06.343878158Z", + "publish_time": "2025-12-23T09:34:06.344061665Z", + "first_worker_start": "2025-12-23T09:34:06.344629688Z", + "last_worker_end": "2025-12-23T09:34:06.407509Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:06.344588887Z", + "start_time": "2025-12-23T09:34:06.344645989Z", + "end_time": "2025-12-23T09:34:06.344691191Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:06.344786Z", + "start_time": "2025-12-23T09:34:06.344913Z", + "end_time": "2025-12-23T09:34:06.407509Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:06.344571386Z", + "start_time": "2025-12-23T09:34:06.344629688Z", + "end_time": "2025-12-23T09:34:06.344686791Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:06.344570786Z", + "start_time": "2025-12-23T09:34:06.344635489Z", + "end_time": "2025-12-23T09:34:06.344656589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2520, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/003bd771797a846c15fa050924028506c0181e0b.json b/data/output/003bd771797a846c15fa050924028506c0181e0b.json new file mode 100644 index 0000000..81af2b1 --- /dev/null +++ b/data/output/003bd771797a846c15fa050924028506c0181e0b.json @@ -0,0 +1,416 @@ +{ + "file_name": "003bd771797a846c15fa050924028506c0181e0b.txt", + "total_words": 971, + "top_n_words": [ + { + "word": "the", + "count": 65 + }, + { + "word": "of", + "count": 30 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "that", + "count": 21 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "his", + "count": 20 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "was", + "count": 15 + }, + { + "word": "they", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "'We'll do it.", + "length": 13 + }, + { + "text": "We will do it.", + "length": 14 + }, + { + "text": "Meghan Keneally .", + "length": 17 + }, + { + "text": "What is she hiding?", + "length": 19 + }, + { + "text": "A family is a family.", + "length": 21 + }, + { + "text": "08:37 EST, 3 May 2013 .", + "length": 23 + }, + { + "text": "15:12 EST, 3 May 2013 .", + "length": 23 + }, + { + "text": "and not be sent back to Russia.", + "length": 31 + }, + { + "text": "police was part of a conspiracy.", + "length": 32 + }, + { + "text": "Mr Stefan added that all of the .", + "length": 33 + }, + { + "text": "In this country, you bury the dead.", + "length": 35 + }, + { + "text": "nondescript headstone might be best.", + "length": 36 + }, + { + "text": "Tsarnaev's family also said that they .", + "length": 39 + }, + { + "text": "The charges were dismissed before trial.", + "length": 40 + }, + { + "text": "'They can protest but again what do you do?", + "length": 43 + }, + { + "text": "cemeteries in the state of Massachusetts have refused to use one of .", + "length": 69 + }, + { + "text": "government rulings, fearing that his death during a gun battle with .", + "length": 69 + }, + { + "text": "would be ordering a second autopsy as they do not trust the American .", + "length": 70 + }, + { + "text": "their plots for the terrorist, with the suggestion that a low-profile .", + "length": 71 + }, + { + "text": "have prior shoplifting convictions, Russell's in 2007 and his mother's in 2012 .", + "length": 80 + }, + { + "text": "'Of course, family members will take possession of the body,' his uncle Tsarni said.", + "length": 84 + }, + { + "text": "Katherine Russell, the widow of Tamerlan, reportedly stopped cooperating with police .", + "length": 86 + }, + { + "text": "Fellow convictions: Both Tamerlan's wife Katherine Russell (left) and his mother Zubeidat K.", + "length": 92 + }, + { + "text": "He admitted to slapping her and was arrested on charges of assault and battery, the complaint said.", + "length": 99 + }, + { + "text": "Around 11 months after the alleged slapping incident, Tsarnaev, 26, married his wife, Katherine Russell.", + "length": 104 + }, + { + "text": "I can't control the circumstances of somebody's death or what they've done or how they died,' he told Fox News.", + "length": 111 + }, + { + "text": "Not only that, each of the people who lost a limb in the attack could expect to receive close to $1million as well.", + "length": 115 + }, + { + "text": "Tsarnaev's body was released to a local funeral home on Thursday after his family made arrangements to schedule a funeral.", + "length": 122 + }, + { + "text": "The final number of people who lost a limb- some of whom lost more than one- has not been tallied but it could be upwards of 20 people.", + "length": 135 + }, + { + "text": "Other relatives of the bomber confirmed that they would be arranging a funeral for the 26-year-old and that his body would remain in the U.", + "length": 139 + }, + { + "text": "Outraged: Garrett Plath, right, holds a sign and Toni Zagami, left, wears a 'Boston Strong' shirt as they stand outside the Dyer-Lake Funeral Home .", + "length": 148 + }, + { + "text": "Medical examiners only ever release suspects bodies after they have determined the cause of death, but that verdict has not been publicly disclosed yet.", + "length": 152 + }, + { + "text": "A lawyer overseeing the fund dedicated to the victims announced that the families of the three who died could receive more than $1million in compensation.", + "length": 154 + }, + { + "text": "The mugshot of the Boston Marathon bomber Tamerlan Tsarnaev was released today giving proof that his violent streak started years before the fatal blasts.", + "length": 154 + }, + { + "text": "The only ones who have received some sliver of good news in the past few days were the relatives of the three victims who died in the blasts at the marathon finish line.", + "length": 169 + }, + { + "text": "Ms Ascencao was 'crying hysterically' and called 911 to report that she was 'beat up by her boyfriend', Cambridge Police Officer Angela Pereira wrote in the arrest report.", + "length": 171 + }, + { + "text": "The girlfriend: The blonde woman in this photo with Tamerlan was identified by The National Enquirer as being Nadine Ascencao, the woman who filed the charges against him .", + "length": 172 + }, + { + "text": "Tsarnaev was booked in 2009 for hitting his then-girlfriend following an argument about another woman, prompting assault and battery charges that were dismissed before trial.", + "length": 174 + }, + { + "text": "' The move outraged Massachusetts residents, whose lives were terrorized for the week after the bombing as local, state and federal officials searched for Tamerlan and Dzhokhar.", + "length": 177 + }, + { + "text": "Ms Russell is now the one gaining the attention of police as they are trying to determine what role, if any, she played in helping plan, execute or cover up the attacks on April 15.", + "length": 181 + }, + { + "text": "The New York Times cites federal sources who say that they are 'skeptical' about her prior claims to have had nothing to do with the bombings considering that she is not cooperating.", + "length": 182 + }, + { + "text": "Criminal past: Tamerlan Tsarnaev was arrested for assault and battery in 2009 after slapping his girlfriend during an argument, though the charges were dismissed before going to trial .", + "length": 185 + }, + { + "text": "Years before he allegedly opened fire on police alongside his younger brother Dzhokhar as they were on the run from authorities, the first apparent victim of his abuse was his girlfriend.", + "length": 187 + }, + { + "text": "The hearse transporting his body was met with protesters throughout its journey, but funeral director Peter Stefan said that he was just doing his professional duty by making the arrangements.", + "length": 192 + }, + { + "text": "Though she was initially helpful, agreeing to be interviewed for hours and giving a DNA sample to test against a trace of female DNA found on one of the bombs, she has reportedly stopped cooperating with authorities.", + "length": 216 + }, + { + "text": "At around the time they were dating, Tsarnaev posed for a series of photos in a boxing ring, and Ms Ascencao was identified by the National Enquirer as being the blonde seen wrapping his gloves in one of the pictures.", + "length": 217 + }, + { + "text": "Nadine Ascencao, 25, accused him of slapping her on July 28, 2009 during an argument over another woman - presumably Katherine Russell, whom he met at a club that year and went on to marry in 2010 - at his home in Cambridge.", + "length": 224 + }, + { + "text": "The mugshot photos come the day after it was announced that his body will be buried in Boston in spite of local outrage that his final resting place will be within the bounds of the city that he terrorized with pressure cooker bombs that killed three and injured more than 250 others.", + "length": 284 + }, + { + "text": "Doing his duty: Peter Stefan, the funeral director and owner of Graham, Putnam and Mahoney Funeral Parlors, said that he can't help what Tsarnaev did but 'in this country, we bury the dead' A vehicle believed to be carrying the body of Boston Marathon bombing suspect Tamerlan Tsarnaev backs into an underground garage at the Dyer-Lake Funeral Home .", + "length": 350 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6274566948413849 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:06.844865838Z", + "first_section_created": "2025-12-23T09:34:06.845262754Z", + "last_section_published": "2025-12-23T09:34:06.845833877Z", + "all_results_received": "2025-12-23T09:34:06.942572974Z", + "output_generated": "2025-12-23T09:34:06.942840385Z", + "total_processing_time_ms": 97, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 96, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:06.845262754Z", + "publish_time": "2025-12-23T09:34:06.845620369Z", + "first_worker_start": "2025-12-23T09:34:06.846045686Z", + "last_worker_end": "2025-12-23T09:34:06.941678Z", + "total_journey_time_ms": 96, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:06.846037285Z", + "start_time": "2025-12-23T09:34:06.846103988Z", + "end_time": "2025-12-23T09:34:06.846209192Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:06.846276Z", + "start_time": "2025-12-23T09:34:06.846427Z", + "end_time": "2025-12-23T09:34:06.941678Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 95 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:06.845979883Z", + "start_time": "2025-12-23T09:34:06.846056486Z", + "end_time": "2025-12-23T09:34:06.846177391Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:06.845986283Z", + "start_time": "2025-12-23T09:34:06.846045686Z", + "end_time": "2025-12-23T09:34:06.846083987Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:06.845704972Z", + "publish_time": "2025-12-23T09:34:06.845833877Z", + "first_worker_start": "2025-12-23T09:34:06.846257494Z", + "last_worker_end": "2025-12-23T09:34:06.929196Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:06.846225693Z", + "start_time": "2025-12-23T09:34:06.846257494Z", + "end_time": "2025-12-23T09:34:06.846271695Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:06.846468Z", + "start_time": "2025-12-23T09:34:06.846606Z", + "end_time": "2025-12-23T09:34:06.929196Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:06.846233793Z", + "start_time": "2025-12-23T09:34:06.846267595Z", + "end_time": "2025-12-23T09:34:06.846287295Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:06.846317497Z", + "start_time": "2025-12-23T09:34:06.846342798Z", + "end_time": "2025-12-23T09:34:06.846349298Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 177, + "min_processing_ms": 82, + "max_processing_ms": 95, + "avg_processing_ms": 88, + "median_processing_ms": 95, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2827, + "slowest_section_id": 0, + "slowest_section_time_ms": 96 + } +} diff --git a/data/output/003be5a8108fcd3f133ddef14a5fc593701566f3.json b/data/output/003be5a8108fcd3f133ddef14a5fc593701566f3.json new file mode 100644 index 0000000..6934e45 --- /dev/null +++ b/data/output/003be5a8108fcd3f133ddef14a5fc593701566f3.json @@ -0,0 +1,218 @@ +{ + "file_name": "003be5a8108fcd3f133ddef14a5fc593701566f3.txt", + "total_words": 369, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "china", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "lunar", + "count": 8 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "landing", + "count": 7 + }, + { + "word": "moon", + "count": 7 + }, + { + "word": "for", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "com, a space news site.", + "length": 23 + }, + { + "text": "Timeline: China's race into space .", + "length": 35 + }, + { + "text": "China sets course for lunar landing this year .", + "length": 47 + }, + { + "text": "Earlier missions included plotting a high-resolution, full-coverage lunar map.", + "length": 78 + }, + { + "text": "The slow-moving rover will patrol the moon's surface for at least three months, according to Xinhua.", + "length": 100 + }, + { + "text": "In 2010, China's previous lunar mission captured images of the crater while scouting potential landing sites for the 2013 probe.", + "length": 128 + }, + { + "text": "In the United States, scientists are concerned the Chinese mission could interfere with a NASA study of the moon's dust environment.", + "length": 132 + }, + { + "text": "The mission constitutes the second phase of China's moon exploration program which includes orbiting, landing and returning to Earth.", + "length": 133 + }, + { + "text": "A public poll determined the the solar-powered robot's name, which comes from the white pet rabbit of the Chinese moon godess Chang'e.", + "length": 134 + }, + { + "text": "China is yet to announce the probe's preferred landing site, but researchers say an impact crater named Sinus Iridum, or Bay of Rainbows, is its likely destination.", + "length": 164 + }, + { + "text": "The unmanned mission marks China's first attempt at a soft-landing on the lunar surface and the first soft-landing on the moon since the Soviet Luna 24 probe in 1976.", + "length": 166 + }, + { + "text": "China is launching its first lunar probe in early December, state-run Xinhua news agency reported Tuesday, just over a decade after the country first sent an astronaut into space.", + "length": 179 + }, + { + "text": "On landing, the spacecraft will release Jade Rabbit (called Yutu in Chinese) -- a six-wheeled lunar rover equipped with four cameras and two mechanical legs that can dig up soil samples, a designer for the rover told Xinhua earlier this month.", + "length": 243 + }, + { + "text": "Chang'e-3's descent is likely to create a noticeable plume on the moon's surface that could skew the results of research already being carried out by NASA's Lunar Atmosphere and Dust Environment Explorer (LADEE), Jeff Plescia, chair of NASA's Lunar Exploration Analysis Group told Space.", + "length": 287 + }, + { + "text": "The Chang'e-3 probe -- which will blast off from a Long March 3B rocket in Sichuan province located in southwest China -- is expected to land on the moon's surface in mid-December, a spokesman for the China's State Administration of Science, Technology and Industry for National Defence told Xinhua.", + "length": 299 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5707005858421326 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:07.346586648Z", + "first_section_created": "2025-12-23T09:34:07.34688256Z", + "last_section_published": "2025-12-23T09:34:07.347065867Z", + "all_results_received": "2025-12-23T09:34:07.407943119Z", + "output_generated": "2025-12-23T09:34:07.408093225Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:07.34688256Z", + "publish_time": "2025-12-23T09:34:07.347065867Z", + "first_worker_start": "2025-12-23T09:34:07.347595988Z", + "last_worker_end": "2025-12-23T09:34:07.407035Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:07.347579788Z", + "start_time": "2025-12-23T09:34:07.347650691Z", + "end_time": "2025-12-23T09:34:07.347686992Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:07.347815Z", + "start_time": "2025-12-23T09:34:07.347952Z", + "end_time": "2025-12-23T09:34:07.407035Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:07.347568987Z", + "start_time": "2025-12-23T09:34:07.347616489Z", + "end_time": "2025-12-23T09:34:07.347682192Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:07.347526086Z", + "start_time": "2025-12-23T09:34:07.347595988Z", + "end_time": "2025-12-23T09:34:07.347615589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2161, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/003c4d5148a6081f329a18755755f37f5eacc001.json b/data/output/003c4d5148a6081f329a18755755f37f5eacc001.json new file mode 100644 index 0000000..ce4d908 --- /dev/null +++ b/data/output/003c4d5148a6081f329a18755755f37f5eacc001.json @@ -0,0 +1,290 @@ +{ + "file_name": "003c4d5148a6081f329a18755755f37f5eacc001.txt", + "total_words": 506, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "livingston", + "count": 12 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "villalobos", + "count": 7 + }, + { + "word": "after", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "Amit Livingston.", + "length": 16 + }, + { + "text": "reporting to prison.", + "length": 20 + }, + { + "text": "Saenz said in a statement.", + "length": 26 + }, + { + "text": "'With the convictions that followed in .", + "length": 40 + }, + { + "text": "'The mishandling of the Livingston case .", + "length": 41 + }, + { + "text": "Limas, and the district attorney at the time, .", + "length": 47 + }, + { + "text": "' Villalobos was sentenced to 13 years in prison.", + "length": 49 + }, + { + "text": "He disappeared after the judge, Abel Limas, took the .", + "length": 54 + }, + { + "text": "federal court, the citizens of Cameron County were ready to turn the .", + "length": 70 + }, + { + "text": "An agreement was reached to use that bond money to settle the lawsuit.", + "length": 70 + }, + { + "text": "Armando Villalobos, were both later convicted in a bribery conspiracy.", + "length": 70 + }, + { + "text": "Limas, who pleaded guilty to racketeering, received six years in prison.", + "length": 72 + }, + { + "text": "page and move forward, but something was still missing: the capture of .", + "length": 72 + }, + { + "text": "corruption scandal suffered by Cameron County,' District Attorney Luis .", + "length": 72 + }, + { + "text": "and flight from justice were the cornerstone and the centerpiece of the .", + "length": 73 + }, + { + "text": "unusual step of allowing Livingston time to get his affairs in order before .", + "length": 77 + }, + { + "text": "Manhunt: Amit Livingston was sentenced to 23 years in prison for murder but fled the U.", + "length": 87 + }, + { + "text": "It is not clear what led authorities to Livingston, who has evaded capture for seven years.", + "length": 91 + }, + { + "text": "Attacked: The body of mother-of-three Hermila Hernandez was dumped on a beach after the shooting .", + "length": 98 + }, + { + "text": "Closure: Hermila Garcia, pictured holding a photo of her daughter, says the family feel robbed of justice .", + "length": 107 + }, + { + "text": "' The scheme that allowed Livingston to escape featured prominently in the federal corruption trial of Villalobos.", + "length": 114 + }, + { + "text": "After shooting the 31-year-old, who had three children, Livingston dumped her body on a beach at South Padre Island.", + "length": 116 + }, + { + "text": "The lawyer representing the children took $200,000 of it in fees, passing $80,000 to Villalobos and $10,000 to Limas.", + "length": 117 + }, + { + "text": "Caught: Amit Livingston, who went on the run after being found guilty of murder in Texas in 2007, has been arrested in India .", + "length": 126 + }, + { + "text": "Victim: Hermila Hernandez was shot in the back of the head in 2005 after she told Livingston she wanted to end their relationship .", + "length": 131 + }, + { + "text": "A Texas man who went on the run in 2007 after being sentenced to 23 years in prison for murdering his girlfriend, has been caught in India.", + "length": 139 + }, + { + "text": "Livingston, who is now 46, was caught in the Indian city of Hyderabad on Tuesday, and is being held while authorities wait to extradite him.", + "length": 140 + }, + { + "text": "Villalobos had arranged to have a friend and former law partner represent the interests of Ms Hernandez's three children in a related lawsuit.", + "length": 142 + }, + { + "text": "By agreeing to convict and sentence Livingston on the same day, Limas freed up the $500,000 bond that had been posted for his release before trial.", + "length": 147 + }, + { + "text": "At Villalobos' sentencing in February, Hermila Garcia, the victim's mother, told the judge: '[Villalobos] robbed us of justice to sell the murderer his freedom.", + "length": 160 + }, + { + "text": "Amit Livingston was found guilty of killing Hermila Hernandez in 2005, but when a judge allowed him time to get his affairs in order before starting his sentence, the killer fled.", + "length": 179 + }, + { + "text": "He had been found guilty of shooting Ms Hernandez in the back of the head in September 2005, when she told him she wanted to end their relationship, the Cameron County district attorney's office said.", + "length": 200 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7099343538284302 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:07.847828938Z", + "first_section_created": "2025-12-23T09:34:07.848218054Z", + "last_section_published": "2025-12-23T09:34:07.848426362Z", + "all_results_received": "2025-12-23T09:34:07.910338256Z", + "output_generated": "2025-12-23T09:34:07.910489162Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:07.848218054Z", + "publish_time": "2025-12-23T09:34:07.848426362Z", + "first_worker_start": "2025-12-23T09:34:07.849055187Z", + "last_worker_end": "2025-12-23T09:34:07.909486Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:07.849029486Z", + "start_time": "2025-12-23T09:34:07.849103789Z", + "end_time": "2025-12-23T09:34:07.849192693Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:07.849253Z", + "start_time": "2025-12-23T09:34:07.849414Z", + "end_time": "2025-12-23T09:34:07.909486Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:07.849005785Z", + "start_time": "2025-12-23T09:34:07.849056187Z", + "end_time": "2025-12-23T09:34:07.84912369Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:07.848990985Z", + "start_time": "2025-12-23T09:34:07.849055187Z", + "end_time": "2025-12-23T09:34:07.849082789Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2994, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/003c553081b17023a17e836376199e87d1915198.json b/data/output/003c553081b17023a17e836376199e87d1915198.json new file mode 100644 index 0000000..5009c9c --- /dev/null +++ b/data/output/003c553081b17023a17e836376199e87d1915198.json @@ -0,0 +1,456 @@ +{ + "file_name": "003c553081b17023a17e836376199e87d1915198.txt", + "total_words": 960, + "top_n_words": [ + { + "word": "the", + "count": 50 + }, + { + "word": "her", + "count": 30 + }, + { + "word": "a", + "count": 27 + }, + { + "word": "was", + "count": 27 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "kami", + "count": 20 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "she", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "eight-month-old son.", + "length": 20 + }, + { + "text": "when she went to bed.", + "length": 21 + }, + { + "text": ", resident Kami Ring .", + "length": 22 + }, + { + "text": "'She was the sweetest.", + "length": 22 + }, + { + "text": "SCROLL DOWN FOR VIDEO .", + "length": 23 + }, + { + "text": "I just don't understand.", + "length": 24 + }, + { + "text": "Murder charge: Richard E.", + "length": 25 + }, + { + "text": "but was released last fall.", + "length": 27 + }, + { + "text": "She was amazing in every way.", + "length": 29 + }, + { + "text": "'She was a model 10-year old.", + "length": 29 + }, + { + "text": "Bill McFarland told Cecil Daily.", + "length": 32 + }, + { + "text": "According to court records, Madden .", + "length": 36 + }, + { + "text": "'She was one of a kind,' Dotson said.", + "length": 37 + }, + { + "text": "He added that Kami was a straight-A .", + "length": 37 + }, + { + "text": "No other family members were with her.", + "length": 38 + }, + { + "text": "In 1994, while living in Oregon, Kami's .", + "length": 41 + }, + { + "text": "student at Charlestown Elementary School.", + "length": 41 + }, + { + "text": "The best kid you could ever have,' he said.", + "length": 43 + }, + { + "text": "Her brother and sister were everything to her.", + "length": 46 + }, + { + "text": "The little girl was pronounced dead at the scene.", + "length": 49 + }, + { + "text": "The Clark couple's son, Madden, shared their home.", + "length": 50 + }, + { + "text": "Biological evidence has allegedly linked Richard E.", + "length": 51 + }, + { + "text": "The family members say she was last seen around 10pm.", + "length": 53 + }, + { + "text": "Mystery surrounds what happened to her on Sunday night.", + "length": 55 + }, + { + "text": "Police K-9 Unit investigate the disappearance of Kami Ring .", + "length": 60 + }, + { + "text": "It is the second time a child has been murdered in the family.", + "length": 62 + }, + { + "text": "Sorry we are not currently accepting comments on this article.", + "length": 62 + }, + { + "text": "mother Ring was sentenced to two years in prison over criminal .", + "length": 64 + }, + { + "text": "Madden has been charged over the death of schoolgirl Kami Ring .", + "length": 64 + }, + { + "text": "A candlelight vigil was held for her in the town on Tuesday night.", + "length": 66 + }, + { + "text": "mistreatment and criminally negligent homicide in the death of her .", + "length": 68 + }, + { + "text": "pleaded guilty to theft in 2009 and was sentenced to five years' jail .", + "length": 71 + }, + { + "text": "Kami's mother Sacha, was said to be relieved that Madden had been charged.", + "length": 74 + }, + { + "text": "'They went to check on her and she was missing,' Maryland State Police Sgt.", + "length": 75 + }, + { + "text": "'We have taught her to scream at the top of her lungs if something ever happened.", + "length": 81 + }, + { + "text": "Madden, 29, to the death of Kami Ring, who died of blunt force trauma and asphyxia.", + "length": 83 + }, + { + "text": "'She packed up and took her clothes and that’s the last we saw her,' Mr Dean said.", + "length": 84 + }, + { + "text": "Mystery: Authorities are still investigating the death of 10-year-old Charlestown, Md.", + "length": 86 + }, + { + "text": "Earlier in the week, her devastated stepfather called the little girl's death 'an inside job.", + "length": 93 + }, + { + "text": "' The shocking discovery of the little girl's body on Monday rocked the Port Deposit community.", + "length": 95 + }, + { + "text": "' 'With all the information that I've gotten it has to be an inside job,' Ricky Dean told WBAL-TV.", + "length": 98 + }, + { + "text": "Tragic: Kami Ring, pictured, was found dead on Monday and on Tuesday night her uncle was arrested .", + "length": 99 + }, + { + "text": "' Model student: Kami, pictured left and right, was a straight A student at her Charlestown school .", + "length": 100 + }, + { + "text": "Sad: Kami Ring, pictured left and right, was found dead 60 yards in front of her grandparents' home .", + "length": 101 + }, + { + "text": "He said the couple had been asked to identify photographs of shoes and pants belonging to their daughter.", + "length": 105 + }, + { + "text": "Police: A Maryland Natural Resources Police vehicle stationed where investigators searched for Kami Ring .", + "length": 106 + }, + { + "text": "Her husband, Richard Dean, said: 'For the most part, we have closure that we know we have the right person.", + "length": 107 + }, + { + "text": "Tragic: Kami had been to a carnival on Sunday before staying with a couple she considered her grandparents .", + "length": 108 + }, + { + "text": "State Troopers, Maryland Police search dogs and Chesapeake Search and Rescue were involved in the investigation.", + "length": 112 + }, + { + "text": "The 'uncle' of a 10-year-old girl whose body was found in a field on Monday, has been charged with her murder and rape.", + "length": 119 + }, + { + "text": "After a seven-hour search, her lifeless body was discovered in a field near a shed just 60 yards in front of their home.", + "length": 120 + }, + { + "text": "Kami disappeared from her bedroom in the middle of the night and the Clarks reported her missing at around 11am on Monday.", + "length": 122 + }, + { + "text": "According to WBAL a window of the bedroom where she was staying was about 7-inches open and the screen had been pushed out.", + "length": 123 + }, + { + "text": "She added that Kami was fiercely protective of her half-brother, Joe, a third-grader, and half-sister, Lizzy, a second-grader.", + "length": 126 + }, + { + "text": "Inside job: Kami's stepfather, Ricky Dean, pictured right, said he believed the girl was killed by someone she knew and trusted .", + "length": 129 + }, + { + "text": "Crime scene: Kami's grandparents said they last saw her alive when she went to bed on Sunday night at their Port Deposit home, pictured .", + "length": 137 + }, + { + "text": "' After going to a carnival on Sunday, Kami had aasked her parents if she could spend the week at Clark’s house, the Washington Post said.", + "length": 140 + }, + { + "text": "Kami, who lived in Charlestown, Maryland, with her mother, stepfather and two younger siblings, was staying at her so-called grandparents' home for the weekend.", + "length": 160 + }, + { + "text": "Madden, whose most recent job was at Denny's restaurant in Perryville, is being held on a retake warrant related to conditions tied to his 2012 parole, according to ABC News.", + "length": 174 + }, + { + "text": "Tributes flooded in for the Charlestown Elementary School student on various social networking sites and members of the local community expressed their sadness and shock at the discovery of her body.", + "length": 199 + }, + { + "text": "Brittany Dotson, who got to know Kami at Cecil County Girls and Boys club where she attended afterschool care each afternoon, broke down as she described the 10-year-old who loved to laugh, sing and dance.", + "length": 205 + }, + { + "text": "The body of the straight A student was found on Monday evening, in an overgrown field across the street from a Port Deposit home where she had been staying with a couple whom she considered her grandparents.", + "length": 207 + }, + { + "text": "Police set up a command center with the Cecil County Department of Emergency Services at the nearby Pleasant View Baptist Church and conducted an exhaustive search of the area near the home, Cecil Daily reported.", + "length": 212 + }, + { + "text": "Madden, who has a string of past arrests, was charged after officers found the substances during a search of the Port Deposit home he shared with his parents - two people Kami had for years considered her grandparents though they were not related to her.", + "length": 254 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7575218677520752 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:08.349642251Z", + "first_section_created": "2025-12-23T09:34:08.351225815Z", + "last_section_published": "2025-12-23T09:34:08.351529327Z", + "all_results_received": "2025-12-23T09:34:08.434171156Z", + "output_generated": "2025-12-23T09:34:08.434396265Z", + "total_processing_time_ms": 84, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 82, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:08.351225815Z", + "publish_time": "2025-12-23T09:34:08.351433123Z", + "first_worker_start": "2025-12-23T09:34:08.35233166Z", + "last_worker_end": "2025-12-23T09:34:08.433319Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:08.352436764Z", + "start_time": "2025-12-23T09:34:08.352511567Z", + "end_time": "2025-12-23T09:34:08.352615971Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:08.352562Z", + "start_time": "2025-12-23T09:34:08.352749Z", + "end_time": "2025-12-23T09:34:08.433319Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 80 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:08.352290158Z", + "start_time": "2025-12-23T09:34:08.352363261Z", + "end_time": "2025-12-23T09:34:08.352458565Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:08.352259257Z", + "start_time": "2025-12-23T09:34:08.35233166Z", + "end_time": "2025-12-23T09:34:08.352428264Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:08.351467725Z", + "publish_time": "2025-12-23T09:34:08.351529327Z", + "first_worker_start": "2025-12-23T09:34:08.35233106Z", + "last_worker_end": "2025-12-23T09:34:08.423576Z", + "total_journey_time_ms": 72, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:08.352432464Z", + "start_time": "2025-12-23T09:34:08.352608571Z", + "end_time": "2025-12-23T09:34:08.352620571Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:08.35261Z", + "start_time": "2025-12-23T09:34:08.352724Z", + "end_time": "2025-12-23T09:34:08.423576Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:08.352432664Z", + "start_time": "2025-12-23T09:34:08.352601571Z", + "end_time": "2025-12-23T09:34:08.352623571Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:08.352296058Z", + "start_time": "2025-12-23T09:34:08.35233106Z", + "end_time": "2025-12-23T09:34:08.35233796Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 150, + "min_processing_ms": 70, + "max_processing_ms": 80, + "avg_processing_ms": 75, + "median_processing_ms": 80, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2746, + "slowest_section_id": 0, + "slowest_section_time_ms": 82 + } +} diff --git a/data/output/003c8ed562e88291fe6db13c7cf0e67d6366cda5.json b/data/output/003c8ed562e88291fe6db13c7cf0e67d6366cda5.json new file mode 100644 index 0000000..5a48360 --- /dev/null +++ b/data/output/003c8ed562e88291fe6db13c7cf0e67d6366cda5.json @@ -0,0 +1,314 @@ +{ + "file_name": "003c8ed562e88291fe6db13c7cf0e67d6366cda5.txt", + "total_words": 829, + "top_n_words": [ + { + "word": "the", + "count": 51 + }, + { + "word": "in", + "count": 33 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "was", + "count": 20 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "police", + "count": 15 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "after", + "count": 11 + }, + { + "word": "for", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Investigators survey the scene at Place du President Wilson in Dijon.", + "length": 69 + }, + { + "text": "Among the injured was an 11-year-old child, according to local media.", + "length": 69 + }, + { + "text": "‘According to our information he was acting alone,’ said Mr Brandet.", + "length": 72 + }, + { + "text": "The 40-year-old, known to police in the area, was arrested at the scene .", + "length": 73 + }, + { + "text": "'Many are suffering with very serious head injuries,’ the source added.", + "length": 73 + }, + { + "text": "Three people, including the attacker, died in an assault by special forces.", + "length": 75 + }, + { + "text": "His brother was being watched, however, and had considered going to fight in Syria.", + "length": 83 + }, + { + "text": "‘He was arrested in the centre of the city, and was immediately placed in custody.", + "length": 84 + }, + { + "text": "Dijon police arrested the 40-year-old man shortly after arriving at the scene this evening .", + "length": 92 + }, + { + "text": "The 40-year-old was arrested by police at the scene and may face terrorism charges, authorities said.", + "length": 101 + }, + { + "text": "Roads surrounding the square in the centre of Dijon remained closed for hours as police investigated .", + "length": 102 + }, + { + "text": "French authorities said the man, a known psychiatric patient in the area, may face terrorism charges .", + "length": 102 + }, + { + "text": "A visibly distressed woman is accompanied across the road by a police officer following the incident .", + "length": 102 + }, + { + "text": "The square in Dijon (pictured was cordoned off by police following the incident at around 8pm on Sunday .", + "length": 105 + }, + { + "text": "Nzohabonayo, who had posted an IS flag on his Facebook account, was a French national born in Burundi in 1994.", + "length": 110 + }, + { + "text": "Thirteen people were injured in Dijon when a psychiatric patient ploughed his car into a crowd of pedestrians.", + "length": 110 + }, + { + "text": "Nzohabonayo had previously committed petty criminal offences but was not on a domestic intelligence watch-list.", + "length": 111 + }, + { + "text": "Authorities in France believe around 1,200 French nationals are involved in jihadist networks in Iraq and Syria.", + "length": 112 + }, + { + "text": "’ Local reports claim the man is thought to have been drunk when he launched the attack on the group of pedestrians .", + "length": 119 + }, + { + "text": "’ He said the man, who has not yet been identified, ‘may face terrorist charges, but that will be a decision for the justice system.", + "length": 136 + }, + { + "text": "The driver was thought to be heading for a police station in Dijon, but only got as far as surrounding streets before his car was stopped.", + "length": 138 + }, + { + "text": "The attack comes days after Bertrand Nzohabonayo was shot dead after entering a French police station with a knife and injuring two officers .", + "length": 143 + }, + { + "text": "Interior Minister Bernard Cazeneuve said security had been 'stepped up' for police officers across France in the wake of the attack last week .", + "length": 144 + }, + { + "text": "Thirteen people have been injured after a known psychiatric patient drove into pedestrians shouting 'God is greatest' in Arabic in a French city.", + "length": 145 + }, + { + "text": "The man is known to police, and may have carried out a copycat attack based on terrorist related incidents in his home country, and in Australia.", + "length": 145 + }, + { + "text": "Speaking of the attack in Dijon last night, a police source said the man used a Renault Clio to run over 'as many people as he could' before being stopped.", + "length": 155 + }, + { + "text": "Pictures showed the injured lying in the road around Place du Wilson as emergency vehicles and anti-terrorism officials arrived to take them to local hospitals.", + "length": 160 + }, + { + "text": "’ Interior Minister Bernard Cazeneuve visited the scene of the attack on Saturday, saying he had ordered ‘security measures to be stepped up’ for all officers.", + "length": 165 + }, + { + "text": "Last week in Sydney, Australia, Man Haron Monis , an Iranian-born Islamist with a history of extremism and violence entered a cafe and held people hostage for 16 hours.", + "length": 168 + }, + { + "text": "Two of those hit by the driver, who was believed to be drunk, were in a ‘very serious’ condition in the city of Dijon after being struck shortly after 8pm on Sunday night.", + "length": 175 + }, + { + "text": "Governments around the world have been braced for further ‘lone wolf’ attacks by individuals who show their support for IS, which now controls huge parts of Syria and Iraq.", + "length": 176 + }, + { + "text": "The main suspect in the murders of four people at Brussels’ Jewish Museum in May, is Mehdi Nemmouche, a Frenchman who spent more than a year fighting with extremists in Syria.", + "length": 177 + }, + { + "text": "Interior Ministry spokesman Pierre-Henry Brandet confirmed that the man was a ‘psychiatric patient’ and used expressions including ‘Allahu Akbar’, or ‘God is Great’.", + "length": 177 + }, + { + "text": "He was also heard to say ‘For the Children of Palestine’, referring to deaths of more than 500 youngsters during the Israeli military campaign in the Gaza Strip over the summer.", + "length": 181 + }, + { + "text": "The incident comes days after a man was shot dead after entering a police station armed with a knife in another French town, slashing two officers while proclaiming his love of Islam.", + "length": 183 + }, + { + "text": "It follows security being stepped up at police and fire stations across France after a knife-wielding French convert to Islam was shot dead after attacking three police officers on Saturday.", + "length": 190 + }, + { + "text": "Bertrand Nzohabonayo was shot dead after entering a police station in the central town of Joue-les-Tours armed with a knife, seriously wounding two officers, including slashing one in the face.", + "length": 193 + }, + { + "text": "He was also heard shouting ‘Allahu Akbar’ during the assault, as local prosecutor Jean-Luc Beck said investigators would seek to determine whether ‘he acted alone or if he acted on orders.", + "length": 194 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8683660626411438 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:08.852313799Z", + "first_section_created": "2025-12-23T09:34:08.854188075Z", + "last_section_published": "2025-12-23T09:34:08.854396683Z", + "all_results_received": "2025-12-23T09:34:08.923047648Z", + "output_generated": "2025-12-23T09:34:08.923246156Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:08.854188075Z", + "publish_time": "2025-12-23T09:34:08.854396683Z", + "first_worker_start": "2025-12-23T09:34:08.854960406Z", + "last_worker_end": "2025-12-23T09:34:08.922064Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:08.855037109Z", + "start_time": "2025-12-23T09:34:08.855134513Z", + "end_time": "2025-12-23T09:34:08.855226617Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:08.85517Z", + "start_time": "2025-12-23T09:34:08.855322Z", + "end_time": "2025-12-23T09:34:08.922064Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:08.854999207Z", + "start_time": "2025-12-23T09:34:08.85507181Z", + "end_time": "2025-12-23T09:34:08.855172714Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:08.854879103Z", + "start_time": "2025-12-23T09:34:08.854960406Z", + "end_time": "2025-12-23T09:34:08.855004308Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4967, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/003c94bac43e18ec396de51784521abb09513026.json b/data/output/003c94bac43e18ec396de51784521abb09513026.json new file mode 100644 index 0000000..84a3de3 --- /dev/null +++ b/data/output/003c94bac43e18ec396de51784521abb09513026.json @@ -0,0 +1,378 @@ +{ + "file_name": "003c94bac43e18ec396de51784521abb09513026.txt", + "total_words": 781, + "top_n_words": [ + { + "word": "the", + "count": 54 + }, + { + "word": "to", + "count": 31 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "s", + "count": 12 + }, + { + "word": "said", + "count": 12 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "cyber", + "count": 10 + }, + { + "word": "is", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Attacks on U.", + "length": 13 + }, + { + "text": "Marshall Institute.", + "length": 19 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "’ Hackers: Experts say U.", + "length": 27 + }, + { + "text": "08:32 EST, 16 November 2011 .", + "length": 29 + }, + { + "text": "government's lack of response.", + "length": 30 + }, + { + "text": "Defence chiefs have warned that the U.", + "length": 38 + }, + { + "text": "‘Nothing is being done,’ Alperovitch said.", + "length": 46 + }, + { + "text": "companies lose billions through cyber attack every year .", + "length": 57 + }, + { + "text": "‘The fact that it is China, the fact that it is Russia.", + "length": 57 + }, + { + "text": "Cyberspace is a particularly challenging domain for the Pentagon.", + "length": 65 + }, + { + "text": "One defence company lost some 24,000 files in an intrusion in March.", + "length": 68 + }, + { + "text": "What are we going to do to face those countries and get them to stop?", + "length": 69 + }, + { + "text": "forces would be trained to conduct offensive and defensive operations.", + "length": 70 + }, + { + "text": "companies are losing billions of dollars to cyber theft each year, he said.", + "length": 75 + }, + { + "text": "is prepared to retaliate with military force if it came under cyber attack.", + "length": 75 + }, + { + "text": "‘Something has to be done from a policy perspective to address the threat ...", + "length": 79 + }, + { + "text": "Lani Kass, who recently retired as a senior policy adviser to the chairman of the U.", + "length": 84 + }, + { + "text": "The report, mandated by the 2011 Defence Authorisation Act, was made public yesterday.", + "length": 86 + }, + { + "text": "Hostile acts, it said, could include ‘significant cyber attacks directed against the U.", + "length": 89 + }, + { + "text": "computer networks have become more frequent and more damaging in recent years, costing U.", + "length": 89 + }, + { + "text": "Joint Chiefs of Staff, said enemies of the United States were becoming more savvy every day.", + "length": 92 + }, + { + "text": "companies an estimated $1 trillion in lost intellectual property, competitiveness and damage.", + "length": 93 + }, + { + "text": "The networks are probed millions of times a day and penetrations have caused the loss of thousands of files.", + "length": 108 + }, + { + "text": "economy, government or military’ and the response could use electronic means or more conventional military options.", + "length": 117 + }, + { + "text": "Defence: The paper was written for Congress by security chiefs at the Pentagon in response to growing problems with cyber attacks .", + "length": 131 + }, + { + "text": "Defence Department employees operate more than 15,000 computer networks with seven million computers at hundreds of locations around the world.", + "length": 143 + }, + { + "text": "’ ‘When warranted, we will respond to hostile attacks in cyberspace as we would to any other threat to our country,’ the 12-page report to Congress  noted.", + "length": 162 + }, + { + "text": "‘You have got to assume that what we do in cyberspace can be done to us quicker, cheaper and with fewer restrictions,\" she told Reuters after the Marshall Institute event.", + "length": 173 + }, + { + "text": "The report followed the release in mid-July of the Pentagon's cybersecurity policy, which designated cyberspace as an ‘operational domain’ like land, sea and air where U.", + "length": 174 + }, + { + "text": "’ Key to a military response is being able to quickly identify the source of an attack, particularly challenging due to the anonymous nature of the Internet, the report said.", + "length": 176 + }, + { + "text": "Private companies also face relentless cyber attacks, including an increasing number linked to countries like China and Russia, and they have grown increasingly frustrated about the U.", + "length": 184 + }, + { + "text": "Their vulnerability was highlighted by the case of Bradley Manning, who is accused of stealing hundreds of thousands of documents and passing them to the anti-secrecy website WikiLeaks.", + "length": 185 + }, + { + "text": "‘There is a massive amount of frustration on the part of the private sector,’ Dmitri Alperovitch, the former vice president of threat research at McAfee, told an event hosted by the George C.", + "length": 195 + }, + { + "text": "In the most explicit statement about cyber security to date, Pentagon officials said that they reserved the right to use ‘all necessary means to defend our allies, our partners and our interests.", + "length": 197 + }, + { + "text": "In an effort to crack that problem, the Pentagon is supporting research focusing on tracing the physical source of an attack and using behavior-based algorithms to assess the likely identity of an attacker, the report said.", + "length": 223 + }, + { + "text": "‘Should the “deny objectives” element of deterrence not prove adequate,’ the report said, ‘DoD (Department of Defence) maintains, and is further developing, the ability to respond militarily in cyberspace and in other domains.", + "length": 236 + }, + { + "text": "The report said the Defence Department was attempting to deter aggression in cyberspace by developing effective defences that prevent adversaries from achieving their objectives and by finding ways to make attackers pay a price for their actions.", + "length": 246 + }, + { + "text": "security agencies also are grooming a cadre of highly skilled cyber forensics experts and are working with international partners to share information in a timely manner about cyber threats, including malicious code and the people behind it, it said.", + "length": 250 + }, + { + "text": "‘If directed by the president, DoD will conduct offensive cyber operations in a manner consistent with the policy principles and legal regimes that the department follows for kinetic capabilities, including the law of armed conflict,’ the report said.", + "length": 255 + }, + { + "text": "Before moving to offensive action, the United States would exhaust all other options, weigh the risk of action against the cost of inaction and ‘act in a way that reflects our values and strengthens our legitimacy, seeking broad international support wherever possible,’ the report said.", + "length": 291 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5127381086349487 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:09.355173555Z", + "first_section_created": "2025-12-23T09:34:09.35555507Z", + "last_section_published": "2025-12-23T09:34:09.355851582Z", + "all_results_received": "2025-12-23T09:34:09.418545407Z", + "output_generated": "2025-12-23T09:34:09.418751316Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:09.35555507Z", + "publish_time": "2025-12-23T09:34:09.355851582Z", + "first_worker_start": "2025-12-23T09:34:09.356399204Z", + "last_worker_end": "2025-12-23T09:34:09.417578Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:09.356505708Z", + "start_time": "2025-12-23T09:34:09.356576411Z", + "end_time": "2025-12-23T09:34:09.356707016Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:09.356612Z", + "start_time": "2025-12-23T09:34:09.35677Z", + "end_time": "2025-12-23T09:34:09.417578Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:09.356451706Z", + "start_time": "2025-12-23T09:34:09.356534109Z", + "end_time": "2025-12-23T09:34:09.356632813Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:09.356325601Z", + "start_time": "2025-12-23T09:34:09.356399204Z", + "end_time": "2025-12-23T09:34:09.356446506Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4896, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/003c99210ffb08a21ff004000cac7e2efcfd48de.json b/data/output/003c99210ffb08a21ff004000cac7e2efcfd48de.json new file mode 100644 index 0000000..5e730bf --- /dev/null +++ b/data/output/003c99210ffb08a21ff004000cac7e2efcfd48de.json @@ -0,0 +1,378 @@ +{ + "file_name": "003c99210ffb08a21ff004000cac7e2efcfd48de.txt", + "total_words": 768, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "for", + "count": 19 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "blauser", + "count": 17 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "wheelchairs", + "count": 14 + }, + { + "word": "children", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "\"There's no paycheck.", + "length": 21 + }, + { + "text": "' \" Blauser recalled.", + "length": 21 + }, + { + "text": "Members of the the U.", + "length": 21 + }, + { + "text": "\" Want to get involved?", + "length": 23 + }, + { + "text": "It's not really safe here.", + "length": 26 + }, + { + "text": "David Brown, a battalion surgeon.", + "length": 33 + }, + { + "text": "\"So I asked him, 'What do you need?", + "length": 35 + }, + { + "text": "Wheelchairs for Iraqi Kids was born.", + "length": 36 + }, + { + "text": "\"They are often not seen in society.", + "length": 36 + }, + { + "text": "And he's determined to see it through.", + "length": 38 + }, + { + "text": "Vote now for the CNN Hero of the Year .", + "length": 39 + }, + { + "text": "Some parents carry their children every day.", + "length": 44 + }, + { + "text": "But he has no intention of leaving anytime soon.", + "length": 48 + }, + { + "text": "His siblings giggle and sprinkle his face with kisses.", + "length": 54 + }, + { + "text": "But this is a once-in-a-lifetime opportunity,\" he said.", + "length": 55 + }, + { + "text": "Watch Blauser demonstrate the specialized wheelchair » .", + "length": 57 + }, + { + "text": "Check out Wheelchairs for Iraqi Kids and see how to help.", + "length": 57 + }, + { + "text": "Today, Ali smiles at home as he sits in his new wheelchair.", + "length": 59 + }, + { + "text": "\"Ali's handicap affected the family a lot,\" said his father.", + "length": 60 + }, + { + "text": "\"A number of families don't know what's wrong with their kid.", + "length": 61 + }, + { + "text": "Blauser first learned about this situation in 2005 through Maj.", + "length": 63 + }, + { + "text": "\"And he surprised me by his answer: 'I need children's wheelchairs.", + "length": 67 + }, + { + "text": "\" That's the case for 3-year-old Ali Khaled Ibrahim and his family.", + "length": 67 + }, + { + "text": "Illnesses such as Spina bifida, palsy and polio leave them unable to walk.", + "length": 74 + }, + { + "text": "\"The experience for me in the first distribution was awesome,\" said Blauser.", + "length": 76 + }, + { + "text": "He cannot speak and experiences increasingly frequent and violent convulsions.", + "length": 78 + }, + { + "text": "\"I thought maybe that will ease my work as a mother in the way I deal with my son.", + "length": 82 + }, + { + "text": "\" Watch Ali and other children receive their wheelchairs from Blauser's group » .", + "length": 83 + }, + { + "text": "\"They are not a curse, they are a blessing and they deserve to have their needs met.", + "length": 84 + }, + { + "text": "For these children and their families, limited access to health care has taken a toll.", + "length": 86 + }, + { + "text": "\"Disabled children -- they're really the forgotten ones in this war,\" said Blauser, 43.", + "length": 87 + }, + { + "text": "At 8 months old, Ali was struck by a mysterious fever that left him partially paralyzed.", + "length": 88 + }, + { + "text": "Since 2005, Wheelchairs for Iraqi Kids has distributed nearly 650 pediatric wheelchairs.", + "length": 88 + }, + { + "text": "The boy is among hundreds of disabled Iraqi children to benefit from Blauser's generosity.", + "length": 90 + }, + { + "text": "His mother said she couldn't carry out her daily chores and her \"psychological state worsened.", + "length": 94 + }, + { + "text": "and Iraqi armies, Iraqi police and border patrol work together to carry out the distributions.", + "length": 94 + }, + { + "text": "An estimated one in seven Iraqi children ages 2 to 14 lives with a disability, according to UNICEF.", + "length": 99 + }, + { + "text": "' \" Blauser began researching and campaigning for help from friends and family in the United States.", + "length": 100 + }, + { + "text": "Since 2005, Brad Blauser's Wheelchairs for Iraqi Kids program has distributed nearly 650 free wheelchairs.", + "length": 106 + }, + { + "text": "The toddler's parents are thankful for the relief it has brought not only to Ali, but their entire family.", + "length": 106 + }, + { + "text": "Blauser and his group help adjust the children into their wheelchairs, which fit their bodies as they grow.", + "length": 107 + }, + { + "text": "In 30 days, 31 pediatric and small adult wheelchairs arrived in Mosul for distribution to children in need.", + "length": 107 + }, + { + "text": "\" \"When I heard the news of the distribution of these advanced wheelchairs, I was very happy deep down,\" she said.", + "length": 114 + }, + { + "text": "There's not a doctor available for help [and] there's no pediatric wheelchair source in this country,\" Blauser said.", + "length": 116 + }, + { + "text": "\"To see the smile come across their face and [to] look over at the mothers and fathers -- they've definitely been changed.", + "length": 122 + }, + { + "text": "Through sponsor donations, his group purchases the chairs from ROC Wheels for about $200 apiece, and USAID donates shipping.", + "length": 124 + }, + { + "text": "\"By providing what they need, I'm hoping to start a movement to change the way people think about disabled children,\" said Blauser.", + "length": 131 + }, + { + "text": "BAGHDAD, Iraq (CNN) -- Brad Blauser lives in war-torn Baghdad, where he doesn't earn a paycheck and is thousands of miles from his family.", + "length": 138 + }, + { + "text": "To obtain the specialized chairs, Blauser partnered with Reach Out and Care Wheels, a nonprofit pediatric wheelchair organization in Montana.", + "length": 141 + }, + { + "text": "\" Blauser arrived in Iraq as a civilian contractor in 2004, but quit that job last year to devote himself full time to his program, without compensation.", + "length": 153 + }, + { + "text": "The organization provides wheelchairs designed for rough terrains in developing nations, making the devices \"perfect for this environment,\" said Blauser.", + "length": 153 + }, + { + "text": "His friend shared heartbreaking accounts of helpless children pulling themselves along the ground, or living motionless in back rooms, too big to be moved long distances very often.", + "length": 181 + }, + { + "text": "For the past four years, the Dallas, Texas, native has been providing hope to hundreds of disabled Iraqi children and their families through the distribution of pediatric wheelchairs.", + "length": 183 + }, + { + "text": "For Blauser, who provides part-time safety consulting in exchange for room and board, an initial plan to stay for one year has become a dream to get wheelchairs to every Iraqi child who wants one.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4598071277141571 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:09.856642554Z", + "first_section_created": "2025-12-23T09:34:09.858322322Z", + "last_section_published": "2025-12-23T09:34:09.858553731Z", + "all_results_received": "2025-12-23T09:34:09.923678054Z", + "output_generated": "2025-12-23T09:34:09.92382696Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:09.858322322Z", + "publish_time": "2025-12-23T09:34:09.858553731Z", + "first_worker_start": "2025-12-23T09:34:09.85902815Z", + "last_worker_end": "2025-12-23T09:34:09.922773Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:09.859136955Z", + "start_time": "2025-12-23T09:34:09.859225758Z", + "end_time": "2025-12-23T09:34:09.859335263Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:09.859406Z", + "start_time": "2025-12-23T09:34:09.859605Z", + "end_time": "2025-12-23T09:34:09.922773Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:09.859122654Z", + "start_time": "2025-12-23T09:34:09.859194557Z", + "end_time": "2025-12-23T09:34:09.859350063Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:09.858963248Z", + "start_time": "2025-12-23T09:34:09.85902815Z", + "end_time": "2025-12-23T09:34:09.859076952Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4551, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/003cbb1eae84f072b309ee89b8ba25d45bfebdec.json b/data/output/003cbb1eae84f072b309ee89b8ba25d45bfebdec.json new file mode 100644 index 0000000..57e7bb5 --- /dev/null +++ b/data/output/003cbb1eae84f072b309ee89b8ba25d45bfebdec.json @@ -0,0 +1,338 @@ +{ + "file_name": "003cbb1eae84f072b309ee89b8ba25d45bfebdec.txt", + "total_words": 708, + "top_n_words": [ + { + "word": "a", + "count": 21 + }, + { + "word": "the", + "count": 21 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "with", + "count": 15 + }, + { + "word": "christmas", + "count": 13 + }, + { + "word": "is", + "count": 12 + }, + { + "word": "it", + "count": 9 + }, + { + "word": "his", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Mealworm Vegetable Gravy .", + "length": 26 + }, + { + "text": "Brussel Sprouts 'n' locusts .", + "length": 29 + }, + { + "text": "Mince Pies filled with locusts .", + "length": 32 + }, + { + "text": "Mealworm Christmas Pudding with Rum sauce .", + "length": 43 + }, + { + "text": "Cricket filled nut-roast with roast potatoes .", + "length": 46 + }, + { + "text": "'I've never liked turkey - it's dry, bland and boring.", + "length": 54 + }, + { + "text": "It's only a matter of time before everyone is eating them!", + "length": 58 + }, + { + "text": "He orders his creepy-crawlies from a live breeder in Somerset.", + "length": 62 + }, + { + "text": "Mr Bickerton covering his Christmas dinner with mealworm gravy.", + "length": 63 + }, + { + "text": "Homemade mince pies stuffed with locusts and decorated with crickets .", + "length": 70 + }, + { + "text": "'People waste a lot of food at Christmas but insects are very low waste.", + "length": 72 + }, + { + "text": "Crickets are naturally quite a nutty flavour so taste perfect in a roast.", + "length": 73 + }, + { + "text": "Dessert will be a choice of mealworm Christmas pudding or locust mince pies.", + "length": 76 + }, + { + "text": "It's a very similar pairing to bacon and cabbage - it's a genius combination.", + "length": 77 + }, + { + "text": "Swimming in worms: Mealworms also make an appearance in the mixed vegetable gravy .", + "length": 83 + }, + { + "text": "'A native tribe brought some three-inch-long, barbecued beetle larvae for us to try.", + "length": 84 + }, + { + "text": "These findings led Mr Bickerton to start incorporating bugs regularly into his diet.", + "length": 84 + }, + { + "text": "'The amount of water, grain and resources that you need to rear livestock is ridiculous.", + "length": 88 + }, + { + "text": "The traditional Christmas pudding gets a insect makeover and is studded with mealworms .", + "length": 88 + }, + { + "text": "'Locusts are low fat but still very juicy so that goes wonderfully with Brussels sprouts.", + "length": 89 + }, + { + "text": "'A cricket nut roast is a far more delectable and exciting alternative for your holiday lunch.", + "length": 94 + }, + { + "text": "' But it was only in 2011 that he started doing more research into insect eating - or entomophagy.", + "length": 98 + }, + { + "text": "'It was just like an incredible explosion of taste in my mouth, I had never eaten anything like it.", + "length": 99 + }, + { + "text": "'A Christmas pudding can be overly sweet sometimes but the mealworms add a crunch and texture to it.", + "length": 100 + }, + { + "text": "'If people ate less cow, sheep, pig and chicken and ate more bugs instead, everything would balance.", + "length": 100 + }, + { + "text": "Crunchy lunch: The main course is a nut roast filled with crickets and served with roasted potatoes .", + "length": 101 + }, + { + "text": "'Far more chefs are using insects in their cooking and there are bug kitchens opening up across Europe.", + "length": 103 + }, + { + "text": "Dotted with insects and oozing with worms, at first glance this meal looks like it is ready for the bin.", + "length": 104 + }, + { + "text": "Genius combination: Locusts  on the Brussels sprouts provide a crunch to the otherwise soggy offering .", + "length": 104 + }, + { + "text": "'I talked to the chef at my Norwich University about putting bugs on the menu and he seemed really interested.", + "length": 110 + }, + { + "text": "The 25-year-old from from Blackburn, Lancashire, said: 'I'm definitely going to be eating bugs this Christmas.", + "length": 110 + }, + { + "text": "'I discovered that insects have twice as much protein than beef and far more omega-3 fatty acids than in fish.", + "length": 110 + }, + { + "text": "'In fact, research has shown that 100g of locusts for example has more nutrients than any other meat,' he said.", + "length": 111 + }, + { + "text": "Mr Bickerton - who recently completed his PhD in Plant Studies - first ate insects on a field-trip to Ecuador in 2009.", + "length": 118 + }, + { + "text": "Peter Bickerton believes his insect-inspired meal is a much more sustainable alternative to conventional Christmas dinner .", + "length": 123 + }, + { + "text": "He explains: 'Once people get over the initial yuck factor of creepy-crawlies, they'll be pleasantly surprised by the taste.", + "length": 124 + }, + { + "text": "The 25-year-old Education \u0026 Public Engagement Officer at Norwich University first ate insects on a field trip in Ecuador in 2009 .", + "length": 130 + }, + { + "text": "But the creepy-crawly festive feast is the creation of Peter Bickerton, who will be tucking into his insect-filled meal on Christmas Day.", + "length": 137 + }, + { + "text": "He has now replaced all the meat dishes he ate with insect protein instead and is even hoping to get insects on his workplace canteen menu.", + "length": 139 + }, + { + "text": "Clockwise from left to right: Locust mince pies, mealworm gravy, cricket nut roast, locust Brussels sprouts and mealworm Christmas pudding.", + "length": 139 + }, + { + "text": "For the main course there is nut-roast filled with crickets served with Brussels sprouts and locusts, and covered in mealworm vegetable gravy.", + "length": 142 + }, + { + "text": "Mr Bickerton believes his insect-inspired menu is a much more sustainable, protein-rich and healthy alternative to the conventional Christmas dinner.", + "length": 149 + }, + { + "text": "' Peter - who works as an Education \u0026 Public Engagement Officer at Norwich University - claims that the tiny critters are delicious in a Christmas lunch.", + "length": 153 + }, + { + "text": "'If anyone wants to put insects on their plate this Christmas, the first thing to do is search entomophagy on Twitter and you'll find several companies that sell edible bugs.", + "length": 174 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.46568000316619873 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:10.359367804Z", + "first_section_created": "2025-12-23T09:34:10.359786521Z", + "last_section_published": "2025-12-23T09:34:10.360144535Z", + "all_results_received": "2025-12-23T09:34:10.429068312Z", + "output_generated": "2025-12-23T09:34:10.429235318Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:10.359786521Z", + "publish_time": "2025-12-23T09:34:10.360144535Z", + "first_worker_start": "2025-12-23T09:34:10.360552952Z", + "last_worker_end": "2025-12-23T09:34:10.428215Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:10.360581153Z", + "start_time": "2025-12-23T09:34:10.360906066Z", + "end_time": "2025-12-23T09:34:10.36124328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:10.360859Z", + "start_time": "2025-12-23T09:34:10.361013Z", + "end_time": "2025-12-23T09:34:10.428215Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:10.360469949Z", + "start_time": "2025-12-23T09:34:10.360559752Z", + "end_time": "2025-12-23T09:34:10.360657356Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:10.360478549Z", + "start_time": "2025-12-23T09:34:10.360552952Z", + "end_time": "2025-12-23T09:34:10.360601554Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4235, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/003cca58f597860bcafec3089b615910be7b61d8.json b/data/output/003cca58f597860bcafec3089b615910be7b61d8.json new file mode 100644 index 0000000..bba2027 --- /dev/null +++ b/data/output/003cca58f597860bcafec3089b615910be7b61d8.json @@ -0,0 +1,238 @@ +{ + "file_name": "003cca58f597860bcafec3089b615910be7b61d8.txt", + "total_words": 325, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "was", + "count": 7 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "is", + "count": 6 + }, + { + "word": "on", + "count": 6 + }, + { + "word": "years", + "count": 6 + }, + { + "word": "been", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "release states.", + "length": 15 + }, + { + "text": "Meghan Keneally .", + "length": 17 + }, + { + "text": "His court date is scheduled for May 7.", + "length": 38 + }, + { + "text": "'The suspect was interviewed several .", + "length": 38 + }, + { + "text": "He is being held at Sumner County Jail on a $200,000 bond.", + "length": 58 + }, + { + "text": "make the arrest on Friday,' the Gallatin police department press .", + "length": 66 + }, + { + "text": "times during the investigation and sufficient information was learned to .", + "length": 74 + }, + { + "text": "She said that the alleged assaults took place when she was five and six years old.", + "length": 82 + }, + { + "text": "It is considered a Class A felony and must include sexual penetration of the victim.", + "length": 84 + }, + { + "text": "The victim has not been identified due to the nature of the crimes and she is now 14-years-old.", + "length": 95 + }, + { + "text": "In Tennessee, the legal definition of child rape mandates that the victim is between the age of 3- and 13-years-old.", + "length": 116 + }, + { + "text": "Giulliani, whose current wife is not the mother of the alleged victim, was arrested on Friday in Sumner County, Tennessee.", + "length": 122 + }, + { + "text": "Charged: Randy Giulliani, 54, was arrested on charges of raping a child under the age of 13 and aggravated sexual assault .", + "length": 123 + }, + { + "text": "Giulliani said that the had been a pastor for many years but has not been associated with the church for the past two years.", + "length": 124 + }, + { + "text": "Holding: Giulliani was arrested on Friday and remains in police custody in Gallatin, Tennessee (pictured) 'He calls himself a worship leader.", + "length": 141 + }, + { + "text": "A self-described pastor has been arrested and charged with child rape after his former step daughter came forward with information about prior assaults.", + "length": 152 + }, + { + "text": "I don't know if hes a pastor or preacher or if he just leads people in song or anything like that,' a Gallatin Police Department spokesman told MailOnline.", + "length": 155 + }, + { + "text": "According to WKRN-TV, the 54-year-old told the officers from the Gallatin Police Department that he was a worship leader at an unidentified church in Rutherford County.", + "length": 168 + }, + { + "text": "Randy Giulliani has been charged with aggravated sexual battery and the rape of a child after his former step daughter accused him of performing sexual acts on her eight to ten years ago.", + "length": 187 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7697223424911499 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:10.860982637Z", + "first_section_created": "2025-12-23T09:34:10.861247048Z", + "last_section_published": "2025-12-23T09:34:10.861436755Z", + "all_results_received": "2025-12-23T09:34:10.924598209Z", + "output_generated": "2025-12-23T09:34:10.924739814Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:10.861247048Z", + "publish_time": "2025-12-23T09:34:10.861436755Z", + "first_worker_start": "2025-12-23T09:34:10.86205668Z", + "last_worker_end": "2025-12-23T09:34:10.923624Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:10.862006278Z", + "start_time": "2025-12-23T09:34:10.862065181Z", + "end_time": "2025-12-23T09:34:10.862125183Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:10.862307Z", + "start_time": "2025-12-23T09:34:10.862456Z", + "end_time": "2025-12-23T09:34:10.923624Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:10.862022879Z", + "start_time": "2025-12-23T09:34:10.862092082Z", + "end_time": "2025-12-23T09:34:10.862167685Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:10.861981377Z", + "start_time": "2025-12-23T09:34:10.86205668Z", + "end_time": "2025-12-23T09:34:10.862075881Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1878, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/003cd8cef9761b4114a57bc60caf6c9b808d6742.json b/data/output/003cd8cef9761b4114a57bc60caf6c9b808d6742.json new file mode 100644 index 0000000..2b750f5 --- /dev/null +++ b/data/output/003cd8cef9761b4114a57bc60caf6c9b808d6742.json @@ -0,0 +1,468 @@ +{ + "file_name": "003cd8cef9761b4114a57bc60caf6c9b808d6742.txt", + "total_words": 1324, + "top_n_words": [ + { + "word": "the", + "count": 54 + }, + { + "word": "and", + "count": 36 + }, + { + "word": "in", + "count": 34 + }, + { + "word": "they", + "count": 30 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "their", + "count": 20 + }, + { + "word": "have", + "count": 19 + }, + { + "word": "on", + "count": 17 + } + ], + "sorted_sentences": [ + { + "text": "like the missing link for Chelsea.", + "length": 34 + }, + { + "text": "Poor Yaya deserves time to recover .", + "length": 36 + }, + { + "text": "He has a bit of everything about him.", + "length": 37 + }, + { + "text": "But City are Premier League champions.", + "length": 38 + }, + { + "text": "Manchester City remain an enigma to me.", + "length": 39 + }, + { + "text": "Costa is the missing link for Chelsea .", + "length": 39 + }, + { + "text": "VIDEO We must support Toure - Pellegrini .", + "length": 42 + }, + { + "text": "Diego Costa is the real deal as a striker.", + "length": 42 + }, + { + "text": "On their day, they can be something special.", + "length": 44 + }, + { + "text": "It is something I’ve been through recently.", + "length": 45 + }, + { + "text": "He might not score as many but he will create more.", + "length": 51 + }, + { + "text": "It’s partly why Sunday's contest is so fascinating.", + "length": 53 + }, + { + "text": "His attributes look tailor-made for English football.", + "length": 53 + }, + { + "text": "But I don’t see them hitting those heights nearly enough.", + "length": 59 + }, + { + "text": "But behind every dip in form, there is always a back story.", + "length": 59 + }, + { + "text": "They don’t need to win on Sunday; a draw would do them fine.", + "length": 62 + }, + { + "text": "Fabregas looks to be the perfect replacement for Frank Lampard.", + "length": 63 + }, + { + "text": "Given what he has contributed to City, he’s earned that right.", + "length": 64 + }, + { + "text": "Too many can have sleepy days when they’re well below their best.", + "length": 67 + }, + { + "text": "I would guess he needs some time and space to get back to his best.", + "length": 67 + }, + { + "text": "They need to start dominating like champions and asserting themselves.", + "length": 70 + }, + { + "text": "And there are far too many of those days for a club of their aspirations.", + "length": 73 + }, + { + "text": "They’re not a team that will retreat into a solid shape and defend deep.", + "length": 74 + }, + { + "text": "Look at their results against Manchester United in the past three seasons.", + "length": 74 + }, + { + "text": "And in the majority of those encounters they have been absolutely brilliant.", + "length": 76 + }, + { + "text": "Chelsea have won every game this season in the Premier League, having played four .", + "length": 83 + }, + { + "text": "And if you happened to catch them on a good day, they would be 9/10 or close to 10/10.", + "length": 86 + }, + { + "text": "City suffered their third defeat of the season at Bayern Munich in the Champions League .", + "length": 89 + }, + { + "text": "The likes of Thierry Henry and Dennis Bergkamp would put in 8/10 performances every week .", + "length": 90 + }, + { + "text": "Manchester City have too many 'sleepy' players, and Chelsea are currently looking stronger .", + "length": 92 + }, + { + "text": "On days like that you’re left wondering whether they can challenge Real Madrid and Barcelona.", + "length": 95 + }, + { + "text": "In six Premier League games they have scored 18 goals against them and won five of those games.", + "length": 95 + }, + { + "text": "Yaya Toure wasn't at his best on Wednesday night as Manchester City lost 1-0 to Bayern Munich .", + "length": 95 + }, + { + "text": "And if they summon one of their better performances, then they can start that process on Sunday.", + "length": 96 + }, + { + "text": "They are on a completely different level and those performances have been close to 10/10 displays.", + "length": 98 + }, + { + "text": "In six Premier League games against Man United, City have scored 18 goals against them and won five .", + "length": 101 + }, + { + "text": "Fabregas has slotted seamlessly back into English football following his arrival back from Barcelona .", + "length": 102 + }, + { + "text": "Yaya was not at his best against Bayern Munich and he does look a little detached since the World Cup.", + "length": 102 + }, + { + "text": "If you catch City on an off day you can definitely takes points off them, as Stoke did the other week.", + "length": 102 + }, + { + "text": "Man City can dazzle, but they have also showed their weaknesses this season, losing to Stoke at home .", + "length": 102 + }, + { + "text": "They want to control games but I’m not sure they have midfielders who are consistent enough to do that.", + "length": 105 + }, + { + "text": "Costa scored a hat-trick against Swansea City last weekend and is already on seven goals in a blue shirt .", + "length": 106 + }, + { + "text": "They’re not a team that will defend deep and make it difficult for teams and then hit them on the break.", + "length": 106 + }, + { + "text": "We don’t know exactly what the reason is with Yaya but we do know his brother, Ibrahim, died in the summer.", + "length": 109 + }, + { + "text": "Jose Mourinho always said that this was the season on which we should judge him and, so far, that’s looking like a good call.", + "length": 127 + }, + { + "text": "The player who has received the most criticism for his midfield performances and his inconsistency this week has been Yaya Toure.", + "length": 129 + }, + { + "text": "At City, I think you could say that Sergio Aguero and Vincent Kompany meet that standard but not enough of their other key players.", + "length": 131 + }, + { + "text": "They seemed to drop off in their intensity the following season and just at the moment you might be seeing the start of something similar.", + "length": 138 + }, + { + "text": "The 31-year-old has been instrumental in the reshaping of Manchester City and been a powerhouse in midfield in their title-winning seasons.", + "length": 139 + }, + { + "text": "Maybe it’s because of the changes in managers over the last six years but their identity in terms of the playing style is an issue for me.", + "length": 140 + }, + { + "text": "But I don’t see them as a free-flowing team, as Manchester United were in their heyday; they don’t play with the freedom of wide wingers.", + "length": 141 + }, + { + "text": "I don’t know if they want to be an open team who will go out and play with flair and to win simply because we’ve got better players than you.", + "length": 145 + }, + { + "text": "They need to ensure that they don’t fall into the trap of one season on and one season off, as they did after finally winning the title in 2012.", + "length": 146 + }, + { + "text": "The pace of their game, the way they have moved the ball, the speed of their attacks and their strength at the back have been too much for United.", + "length": 146 + }, + { + "text": "On Sunday they face their main rivals for the title in Chelsea and they have been transformed as a club since Sheik Mansour took them over in 2008.", + "length": 147 + }, + { + "text": "Sometimes we simply analyse players as though they are footballing machines but the raw emotion of such a tragedy can’t fail to have an impact in some way.", + "length": 157 + }, + { + "text": "That said, the owners have invested £865million in the club in the six years in which they have owned it yet I still can’t quite grasp their true identity.", + "length": 158 + }, + { + "text": "City were outwitted in the equivalent fixture last season on another of those all-too-familiar off days, even if they got their revenge shortly after in the FA Cup.", + "length": 164 + }, + { + "text": "I would imagine he’ll play just behind Costa at Manchester City on Sunday, with Ramires and Nemanja Matic sitting in the deeper positions to add a bit more solidity.", + "length": 167 + }, + { + "text": "He is great at holding the ball up, has enough speed to get in behind defenders, can work the 18-yard box and is clearly not going to be intimidated by physical opponents.", + "length": 171 + }, + { + "text": "They — as Atletico before them — have worked out how to play in the way that suits him best, which Spain, with their short intricate passing, didn’t manage in the World Cup.", + "length": 179 + }, + { + "text": "Together with Cesc Fabregas, he has formed a wonderful understanding and Chelsea look a better side than last season — and that’s after having made a profit of transfers in the summer.", + "length": 188 + }, + { + "text": "Vincent Kompany picks up Yaya Toure after German champions Bayern Munich's late winner on Wednesday evening, as City left-back Gael Clichy fails to stop Arjen Robben passing him at the byline .", + "length": 193 + }, + { + "text": "It’s very early days but City’s start to the season has been unsure and stuttering, putting them under extra pressure on Sunday because they don’t want to see the five-point gap behind Chelsea increase.", + "length": 208 + }, + { + "text": "You might have thought it would take Chelsea three or four months for their new signings to settle in but Costa, Fabregas and Thibaut Courtois have adapted straight away, which is why I have them as title favourites.", + "length": 216 + }, + { + "text": "With the money that has been invested, I would imagine that the owners would want to see back-to-back Premier League titles or a Champions League semi-final or final this season but, right now, I would put Jose Mourinho’s team ahead of them.", + "length": 243 + }, + { + "text": "When I think about the great Premier League teams such as Manchester United and Arsenal in the 1990s and 2000s, players like Patrick Vieira, Emmanuel Petit, Thierry Henry, Dennis Bergkamp, Roy Keane, Paul Scholes and Cristiano Ronaldo would put in performances that were 8/10 every single week.", + "length": 294 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.44011034071445465 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:11.362551813Z", + "first_section_created": "2025-12-23T09:34:11.364225781Z", + "last_section_published": "2025-12-23T09:34:11.364621297Z", + "all_results_received": "2025-12-23T09:34:11.447899763Z", + "output_generated": "2025-12-23T09:34:11.448118872Z", + "total_processing_time_ms": 85, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 83, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:11.364225781Z", + "publish_time": "2025-12-23T09:34:11.364474691Z", + "first_worker_start": "2025-12-23T09:34:11.365134417Z", + "last_worker_end": "2025-12-23T09:34:11.439561Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:11.365108816Z", + "start_time": "2025-12-23T09:34:11.365175119Z", + "end_time": "2025-12-23T09:34:11.365275823Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:11.365416Z", + "start_time": "2025-12-23T09:34:11.36556Z", + "end_time": "2025-12-23T09:34:11.439561Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:11.365168319Z", + "start_time": "2025-12-23T09:34:11.365228221Z", + "end_time": "2025-12-23T09:34:11.365342226Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:11.365068615Z", + "start_time": "2025-12-23T09:34:11.365134417Z", + "end_time": "2025-12-23T09:34:11.36519242Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:11.364511992Z", + "publish_time": "2025-12-23T09:34:11.364621297Z", + "first_worker_start": "2025-12-23T09:34:11.365119517Z", + "last_worker_end": "2025-12-23T09:34:11.447047Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:11.365183119Z", + "start_time": "2025-12-23T09:34:11.365216721Z", + "end_time": "2025-12-23T09:34:11.365259722Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:11.365479Z", + "start_time": "2025-12-23T09:34:11.36565Z", + "end_time": "2025-12-23T09:34:11.447047Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 81 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:11.365071415Z", + "start_time": "2025-12-23T09:34:11.365119517Z", + "end_time": "2025-12-23T09:34:11.365177119Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:11.365090315Z", + "start_time": "2025-12-23T09:34:11.365145818Z", + "end_time": "2025-12-23T09:34:11.365168119Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 155, + "min_processing_ms": 74, + "max_processing_ms": 81, + "avg_processing_ms": 77, + "median_processing_ms": 81, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3626, + "slowest_section_id": 1, + "slowest_section_time_ms": 82 + } +} diff --git a/data/output/003cefcdcf96dbca76aa17d72ee612fa3b40651d.json b/data/output/003cefcdcf96dbca76aa17d72ee612fa3b40651d.json new file mode 100644 index 0000000..855db06 --- /dev/null +++ b/data/output/003cefcdcf96dbca76aa17d72ee612fa3b40651d.json @@ -0,0 +1,290 @@ +{ + "file_name": "003cefcdcf96dbca76aa17d72ee612fa3b40651d.txt", + "total_words": 716, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "to", + "count": 25 + }, + { + "word": "children", + "count": 17 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "said", + "count": 15 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "s", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Pure and simple.", + "length": 16 + }, + { + "text": "| Impact Your World .", + "length": 21 + }, + { + "text": "List of missing, found | Are you there?", + "length": 39 + }, + { + "text": "CNN's Jessica Ravitz contributed to this report.", + "length": 48 + }, + { + "text": "Adoptions without consent are child trafficking.", + "length": 48 + }, + { + "text": "\" Full coverage | Latest news updates | Twitter updates .", + "length": 57 + }, + { + "text": "But that's not always the best immediate move, aid groups caution.", + "length": 66 + }, + { + "text": "The Red Cross also has set up a Web site to help people searching for relatives, he said.", + "length": 89 + }, + { + "text": "She has been taking care of children who waited for years in the orphanage to be adopted.", + "length": 89 + }, + { + "text": "Their family members may still be alive, she said, and \"will be desperate to be reunited with them.", + "length": 99 + }, + { + "text": "It's no surprise there has been a flood of well-intentioned people who want to adopt those children.", + "length": 100 + }, + { + "text": "This means knowing who is truly an orphan and who isn't requires great attention to detail and documents.", + "length": 105 + }, + { + "text": "Diana Boni, who works with Port-au-Prince's BRESMA orphanage, is firmly against new adoptions out of Haiti.", + "length": 107 + }, + { + "text": "(CNN) -- The images of Haitian children crying or injured or wandering the streets alone are heartbreaking.", + "length": 108 + }, + { + "text": "Allowing adoptions to proceed without thorough background checks can lead to child trafficking and other crimes.", + "length": 112 + }, + { + "text": "\"Under no circumstances should we evacuate any child newly orphaned or displaced,\" she wrote in an e-mail to CNN.", + "length": 113 + }, + { + "text": "Hurriedly whisking unclassified children out of Haiti will not ensure the children are happy or safe in the long-term, experts said.", + "length": 132 + }, + { + "text": "\"It's a bit sad, as I have several wonderful children who waited literally for years for new families, and no one ever came,\" she said.", + "length": 135 + }, + { + "text": "Homes and potential parents must be reviewed by professional social workers and it's logistically impossible to do that in a short time.", + "length": 136 + }, + { + "text": "Haitian children must first be fed, sheltered, clothed and given medical attention; the next step is to register them and trace their relatives.", + "length": 144 + }, + { + "text": "\"Removing children who've just experienced a disaster from their environment, from where they're from is not necessarily good for them,\" he said.", + "length": 145 + }, + { + "text": "For those who want to help Haitian children, Whitbread said, they should donate to aid agencies that are working on reuniting children with their families.", + "length": 155 + }, + { + "text": "\"We are concerned not only about premature overseas adoption but also about children increasingly being sent unaccompanied to the Dominican Republic,\" he said.", + "length": 159 + }, + { + "text": "\"Imagine losing much of your family, only to discover that a surviving relative had been whisked off to the States to be adopted by strangers without your knowledge or consent!", + "length": 176 + }, + { + "text": "Save the Children Chief Executive Jasmine Whitbread said the \"vast majority\" of children on their own in Haiti are not orphans, but were simply separated from their families in the chaos.", + "length": 187 + }, + { + "text": "Because Haiti's poverty already made it \"extremely vulnerable\" to exploitation and abuse, rushed adoptions could open the door to traffickers, said World Vision Chief Executive Justin Byworth.", + "length": 192 + }, + { + "text": "In 2007, UNICEF estimated that there were 380,000 orphans in Haiti, but de Bono said Thursday that he'd \"hate to vouch for that figure\" because that number -- any number -- is impossible to verify.", + "length": 197 + }, + { + "text": "The United Nations Children's Fund, or UNICEF, does not facilitate adoptions, but it has been bombarded with calls from people who want a Haitian orphan, said Christopher de Bono, a UNICEF spokesman.", + "length": 199 + }, + { + "text": "Aid groups said adoptions that were already in progress before the January 12 earthquake should go ahead, as long as the right legal documents are in place and they meet Haitian and international law.", + "length": 200 + }, + { + "text": "Between Haiti's \"lousy [child welfare] oversight system,\" and all the challenges that Haitians have endured, it's not uncommon for Haitian parents to put their children in orphanages temporarily, de Bono said.", + "length": 209 + }, + { + "text": "The International Committee of the Red Cross has opened an office at the headquarters of the Haitian Red Cross in Crois de Prez to help people locate their relatives, said Pete Garratt, a disaster response manager at the British Red Cross.", + "length": 239 + }, + { + "text": "The disaster in Haiti has led to an outpouring of support around the world, with the United States alone donating more than $305 million as of Wednesday, according to the Chronicle of Philanthropy, a newspaper covering nonprofit organizations.", + "length": 243 + }, + { + "text": "\" \"Taking children out of the country would permanently separate thousands of children from their families -- a separation that would compound the acute trauma they are already suffering and inflict long-term damage on their chances of recovery,\" Whitbread said.", + "length": 262 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.9138842821121216 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:11.865411741Z", + "first_section_created": "2025-12-23T09:34:11.866793597Z", + "last_section_published": "2025-12-23T09:34:11.867041807Z", + "all_results_received": "2025-12-23T09:34:11.931761723Z", + "output_generated": "2025-12-23T09:34:11.931952831Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:11.866793597Z", + "publish_time": "2025-12-23T09:34:11.867041807Z", + "first_worker_start": "2025-12-23T09:34:11.86762603Z", + "last_worker_end": "2025-12-23T09:34:11.930853Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:11.867648131Z", + "start_time": "2025-12-23T09:34:11.867758536Z", + "end_time": "2025-12-23T09:34:11.867840239Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:11.86788Z", + "start_time": "2025-12-23T09:34:11.86804Z", + "end_time": "2025-12-23T09:34:11.930853Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:11.867543127Z", + "start_time": "2025-12-23T09:34:11.86762603Z", + "end_time": "2025-12-23T09:34:11.867723334Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:11.867562128Z", + "start_time": "2025-12-23T09:34:11.867659732Z", + "end_time": "2025-12-23T09:34:11.867721434Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4357, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/003d23a256ce34d73e05968a04727c0ed4a2a456.json b/data/output/003d23a256ce34d73e05968a04727c0ed4a2a456.json new file mode 100644 index 0000000..501c999 --- /dev/null +++ b/data/output/003d23a256ce34d73e05968a04727c0ed4a2a456.json @@ -0,0 +1,326 @@ +{ + "file_name": "003d23a256ce34d73e05968a04727c0ed4a2a456.txt", + "total_words": 737, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "and", + "count": 23 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "airport", + "count": 17 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "hotel", + "count": 11 + }, + { + "word": "hotels", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "K.", + "length": 2 + }, + { + "text": "Element Miami .", + "length": 15 + }, + { + "text": "Planning a getaway?", + "length": 19 + }, + { + "text": "All rights reserved.", + "length": 20 + }, + { + "text": "Hilton Frankfurt Airport .", + "length": 26 + }, + { + "text": "Custom Hotel, Los Angeles .", + "length": 27 + }, + { + "text": "ALT Hotel Pearson, Toronto .", + "length": 28 + }, + { + "text": "Hilton Heathrow Terminal 5, U.", + "length": 30 + }, + { + "text": "Atlanta Airport Marriott Gateway .", + "length": 34 + }, + { + "text": "Steigenberger Airport Hotel Berlin .", + "length": 36 + }, + { + "text": "Aloft San Francisco International Airport .", + "length": 43 + }, + { + "text": "Lotte City Hotel Gimpo Airport, South Korea .", + "length": 45 + }, + { + "text": "Travel + Leisure: America's safest airports .", + "length": 45 + }, + { + "text": "Travel + Leisure: Innovative new airport terminals .", + "length": 52 + }, + { + "text": "Travel + Leisure: America's best and worst Airports .", + "length": 53 + }, + { + "text": "Copyright 2012 American Express Publishing Corporation.", + "length": 55 + }, + { + "text": "Don't miss Travel + Leisure's guide to the World's Best Hotels .", + "length": 64 + }, + { + "text": "The Hilton Frankfurt Airport is a stylish, hyper-connected oasis.", + "length": 65 + }, + { + "text": "It opened in late 2011, with touch screen controls in the 197 rooms.", + "length": 68 + }, + { + "text": "Moreover, they are being retooled for a new breed of business traveler.", + "length": 71 + }, + { + "text": "Todiwala's Kitchen), this property has all the makings of a hotel hot spot.", + "length": 75 + }, + { + "text": "Yet checking into the Hilton Frankfurt Airport, which opened in December turned out to be much more.", + "length": 100 + }, + { + "text": "Some of the best and most spectacular airport hotels are in Asia: the Regal in Hong Kong; the Crowne Plaza in Singapore.", + "length": 120 + }, + { + "text": "When Squaire managing director Christoph Nebl characterizes it as \"the best-connected spot in Europe,\" he's not exaggerating.", + "length": 125 + }, + { + "text": "The improved hotels are one component of a backlash against that shiny one-world placelessness that airports have long cultivated.", + "length": 130 + }, + { + "text": "\"The nature of work is changing,\" says Erin Hoover, head of design for the Sheraton and Westin brands, \"and it's very collaborative.", + "length": 132 + }, + { + "text": "Two minutes from the terminal via SkyTrain, the building is LEED certified and has a lobby floor made of terrazzo embedded with glass.", + "length": 134 + }, + { + "text": "Understated and refined, this hotel provides a welcome break from its chaotic surroundings—a massive theme-park-mall complex within the airport.", + "length": 146 + }, + { + "text": "The fully equipped kitchens, nutritious menus, and bathrooms with mood-improving lighting attest to Element's health-conscious hospitality approach.", + "length": 148 + }, + { + "text": "Sheraton Malpensa Hotel (Milan) A series of glass modules lined up like the teeth of a comb, this property makes for a fitting addition to a world capital of design.", + "length": 165 + }, + { + "text": "From its glamorous all-white main lobby staircase and unusually glitzy light fixtures to perfectly manicured exterior grounds and a celebrity chef--helmed restaurant (Mr.", + "length": 170 + }, + { + "text": "It's an example of the emerging generation of airport hotels that are intended to function as destinations, real places where one might reasonably stay longer than a single night.", + "length": 179 + }, + { + "text": "And there's more going on than that: the increasing sophistication of these hotels parallels a reemergence of civilization—daring architecture; edible food—in airports themselves.", + "length": 183 + }, + { + "text": "The Miami International Airport satellite of this Westin brand features the cutting-edge Pilot program, where electricity can be generated by guests using the hotel's stationary bikes.", + "length": 184 + }, + { + "text": "Now the rest of the world is catching up, and the newest airport hotels in Europe, the United States, Latin America, and elsewhere are responding to the generalized craving for experience.", + "length": 188 + }, + { + "text": "A newly rehabbed Clarion Inn building—dropped ceilings have been removed to give the rooms at this hotel an airy feel, and an expanded lobby big enough for a bustling bar scene has been added.", + "length": 194 + }, + { + "text": "(CNN) -- Airport hotels have always been necessary but unloved stopover spots for the depleted traveler, places to shower, rehydrate and let the body recuperate from the merciless rigors of flying.", + "length": 197 + }, + { + "text": "Original art, Egyptian cotton linens, an Italian-made Calla chair, and Fruits \u0026 Passion bath products lend sophisticated global flair to the 153-room ALT, part of Canadian hotel group Groupe Germain.", + "length": 199 + }, + { + "text": "\" Now airport hotels—like the newly opened Hilton in London, Novotel in Auckland, New Zealand, and Element in Miami—are catching up, bringing technology, design, and style to the international stopover.", + "length": 206 + }, + { + "text": "When Berlin's long-awaited Brandenburg Airport opens in March 2013, so too will this grand 322-room property with an outdoor reflecting pool, nine meeting spaces, a lobby bistro, and a fitness center with a gym, sauna, and steam bath.", + "length": 234 + }, + { + "text": "Relaunched and refreshed by Joie de Vivre in September 2011, this bombastic crash pad minutes from LAX appeals to your sense of whimsy with themed gimmicks, like the Pan Am--inspired staff uniforms and Hangar Lounge, the property's main lobby.", + "length": 243 + }, + { + "text": "The hotel, along with the lower-priced Hilton Garden Inn, occupies the eastern end of the Squaire (a name meant to evoke town square and air), an ultra-elongated mixed-use complex that rests on angled columns atop a high-speed rail station, is adjacent to the airport's commuter train station, and is squeezed between two major autobahns.", + "length": 338 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4147857427597046 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:12.367841451Z", + "first_section_created": "2025-12-23T09:34:12.368250568Z", + "last_section_published": "2025-12-23T09:34:12.368524279Z", + "all_results_received": "2025-12-23T09:34:12.426737632Z", + "output_generated": "2025-12-23T09:34:12.42693914Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:12.368250568Z", + "publish_time": "2025-12-23T09:34:12.368524279Z", + "first_worker_start": "2025-12-23T09:34:12.369092502Z", + "last_worker_end": "2025-12-23T09:34:12.425803Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:12.369098802Z", + "start_time": "2025-12-23T09:34:12.369194206Z", + "end_time": "2025-12-23T09:34:12.36928941Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:12.369293Z", + "start_time": "2025-12-23T09:34:12.369448Z", + "end_time": "2025-12-23T09:34:12.425803Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:12.369058701Z", + "start_time": "2025-12-23T09:34:12.369135104Z", + "end_time": "2025-12-23T09:34:12.369243308Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:12.369006799Z", + "start_time": "2025-12-23T09:34:12.369092502Z", + "end_time": "2025-12-23T09:34:12.369131804Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4627, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/003d28d0a36ba8c2cda72ce06394ee4c34437d17.json b/data/output/003d28d0a36ba8c2cda72ce06394ee4c34437d17.json new file mode 100644 index 0000000..f05b584 --- /dev/null +++ b/data/output/003d28d0a36ba8c2cda72ce06394ee4c34437d17.json @@ -0,0 +1,266 @@ +{ + "file_name": "003d28d0a36ba8c2cda72ce06394ee4c34437d17.txt", + "total_words": 464, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "school", + "count": 8 + }, + { + "word": "teacher", + "count": 8 + }, + { + "word": "ceremony", + "count": 7 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "was", + "count": 7 + }, + { + "word": "for", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Ted Thornhill .", + "length": 15 + }, + { + "text": "box of sex toys.", + "length": 16 + }, + { + "text": "hers just happened to be public’.", + "length": 35 + }, + { + "text": "The attorney says \"f*** the children!", + "length": 37 + }, + { + "text": "\"' The parent, who walked out mid-way .", + "length": 39 + }, + { + "text": "' However, one former pupil defended her, .", + "length": 43 + }, + { + "text": "Much of the evening was a great celebration of their work.", + "length": 58 + }, + { + "text": "\" and the priest says \"oooh… Do we have time for that???", + "length": 58 + }, + { + "text": "through, added that the ceremony also contained awards for ‘horniest .", + "length": 72 + }, + { + "text": "‘The plane was going down and the teacher says we have to save the children.", + "length": 78 + }, + { + "text": "saying she had ‘a fun, outgoing personality’ and ‘people make mistakes, .", + "length": 79 + }, + { + "text": "stud’ and ‘horniest girl’, for which the prize was what appeared to be a .", + "length": 80 + }, + { + "text": "Controversy: The ceremony at Bellingham High School (pictured) caused outrage amongst parents .", + "length": 95 + }, + { + "text": "‘However, as a teacher and the club's leader, I take full responsibility and am extremely sorry.", + "length": 98 + }, + { + "text": "The backlash against the teacher, Teri Grimes, was so fierce that she was forced to issue a formal apology.", + "length": 107 + }, + { + "text": "‘This is not representative of our students who take such great pride in their school and respect one another.", + "length": 112 + }, + { + "text": "Bellingham High School Principal Jeff Vaughn wrote a message on the school website in which he described the controversy as a learning curve.", + "length": 141 + }, + { + "text": "One parent who witnessed the event at Bellingham High School in Whatcom County, Washington State, described how it left her opened mouthed in shock.", + "length": 148 + }, + { + "text": "'This has generated media coverage and has been a learning opportunity for our teens regarding the harm that can come from offensive comments and jokes.", + "length": 152 + }, + { + "text": "The upset mother vented her anger about Sunday's ceremony in the school hall, which she attended with her 17-year-old daughter, in an email to KOMO News.", + "length": 153 + }, + { + "text": "She said: ‘I sat there with my mouth open in shock and the final straw was when a joke was told on stage about a teacher, a lawyer and a priest on a plane.", + "length": 157 + }, + { + "text": "Apology: The teacher who organized the event, Teri Grimes, has said she is 'extremely sorry' about its content, which included a pedophile joke and profanity .", + "length": 159 + }, + { + "text": "It said: 'We met with our drama club advisor and students to discuss the very inappropriate behavior by some members of the club that occurred during their awards ceremony on Sunday evening.", + "length": 190 + }, + { + "text": "High School parents were left outraged after watching a drama teacher’s awards ceremony, which contained swearing, a box of sex toys being presented as a prize and a joke about a paedophile priest.", + "length": 199 + }, + { + "text": "Under fire: High School parents were left outraged after watching an awards ceremony by drama teacher Teri Grimes (pictured), which contained swearing and a box of sex toys being presented as a prize .", + "length": 201 + }, + { + "text": "A statement from Grimes, who has been a teacher for 30 years, said: ‘I deeply apologize for some very inappropriate comments and actions made during our drama students' end-of-the-year awards ceremony.", + "length": 203 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7011974453926086 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:12.869358725Z", + "first_section_created": "2025-12-23T09:34:12.871095395Z", + "last_section_published": "2025-12-23T09:34:12.871269902Z", + "all_results_received": "2025-12-23T09:34:12.934770069Z", + "output_generated": "2025-12-23T09:34:12.934954277Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:12.871095395Z", + "publish_time": "2025-12-23T09:34:12.871269902Z", + "first_worker_start": "2025-12-23T09:34:12.871914628Z", + "last_worker_end": "2025-12-23T09:34:12.933885Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:12.871906328Z", + "start_time": "2025-12-23T09:34:12.871975531Z", + "end_time": "2025-12-23T09:34:12.872037233Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:12.872066Z", + "start_time": "2025-12-23T09:34:12.8722Z", + "end_time": "2025-12-23T09:34:12.933885Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:12.871852126Z", + "start_time": "2025-12-23T09:34:12.871937129Z", + "end_time": "2025-12-23T09:34:12.872016533Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:12.871844326Z", + "start_time": "2025-12-23T09:34:12.871914628Z", + "end_time": "2025-12-23T09:34:12.87194303Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2753, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/003d61820f25422f9f18160da2dd66ae54be59d0.json b/data/output/003d61820f25422f9f18160da2dd66ae54be59d0.json new file mode 100644 index 0000000..fbb273a --- /dev/null +++ b/data/output/003d61820f25422f9f18160da2dd66ae54be59d0.json @@ -0,0 +1,222 @@ +{ + "file_name": "003d61820f25422f9f18160da2dd66ae54be59d0.txt", + "total_words": 344, + "top_n_words": [ + { + "word": "to", + "count": 15 + }, + { + "word": "the", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "ibrahimovic", + "count": 9 + }, + { + "word": "a", + "count": 7 + }, + { + "word": "with", + "count": 6 + }, + { + "word": "their", + "count": 5 + }, + { + "word": "cup", + "count": 4 + }, + { + "word": "for", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "John Drayton .", + "length": 14 + }, + { + "text": "VIDEO Scroll down to watch The best of Zlatan .", + "length": 47 + }, + { + "text": "Put it there: Ibrahimovic and Cristiano Ronaldo shake hands before the play-off game .", + "length": 86 + }, + { + "text": "Missing out: Ibrahimovic and Sweden missed out on a place at the World Cup when they lost to Portugal .", + "length": 103 + }, + { + "text": "Star in France: Ibrahimovic helped PSG win the French title and will be joined next season by David Luiz .", + "length": 106 + }, + { + "text": "Fans make their point: French supporters hold a banner referring to Ibrahimovic before their game with Honduras .", + "length": 113 + }, + { + "text": "Summer break: Sweden striker Ibrahimovic couldn't fire his country to the World Cup but he is now attending as a fan .", + "length": 118 + }, + { + "text": "Look who's here: Paris Saint-Germain striker Zlatan Ibrahimovic poses with World Cup mascot Fuleco after landing in Rio .", + "length": 121 + }, + { + "text": "The Paris Saint-Germain forward was pictured arriving in Rio and posing with the tournament's official mascot Fuleco the Armadillo.", + "length": 131 + }, + { + "text": "But the former Barcelona forward was powerless to stop Cristiano Ronaldo netting a hat-trick to seal a 4-2 aggregate win for Paulo Bento's side.", + "length": 144 + }, + { + "text": "Ibrahimovic did everything in his power to fire Sweden to the finals, scoring twice in the second leg of their play-off with Portugal in Stockholm.", + "length": 147 + }, + { + "text": "Sweden may have lost to Portugal in their World Cup play-off in November, but that hasn't stopped star striker Zlatan Ibrahimovic travelling to Brazil.", + "length": 151 + }, + { + "text": "Marco Verratti and Salvatore Sirigu are looking to progress from Group D with Italy, while Ezequiel Lavezzi is battling the likes of Lionel Messi and Sergio Aguero for a place in Argentina's starting XI.", + "length": 203 + }, + { + "text": "It is not yet known whether Ibrahimovic has travelled to South America as a fan or for commercial reasons, although he will have no shortage of team-mates to watch if he decides to take in some live action.", + "length": 206 + }, + { + "text": "Thiago Silva and new signing David Luiz are both plying their trade for home favourites Brazil, Edinson Cavani will be looking to shoot down England with Uruguay on Thursday and French duo Yohan Cabaye and Blaise Matuidi starred in Les Bleus' opening 3-0 win against Honduras.", + "length": 276 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4471557140350342 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:13.371835738Z", + "first_section_created": "2025-12-23T09:34:13.373457403Z", + "last_section_published": "2025-12-23T09:34:13.373640611Z", + "all_results_received": "2025-12-23T09:34:13.435533013Z", + "output_generated": "2025-12-23T09:34:13.435700519Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:13.373457403Z", + "publish_time": "2025-12-23T09:34:13.373640611Z", + "first_worker_start": "2025-12-23T09:34:13.374144931Z", + "last_worker_end": "2025-12-23T09:34:13.434589Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:13.374176632Z", + "start_time": "2025-12-23T09:34:13.374241535Z", + "end_time": "2025-12-23T09:34:13.374304937Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:13.374428Z", + "start_time": "2025-12-23T09:34:13.374573Z", + "end_time": "2025-12-23T09:34:13.434589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:13.374085329Z", + "start_time": "2025-12-23T09:34:13.374144931Z", + "end_time": "2025-12-23T09:34:13.374223134Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:13.37411823Z", + "start_time": "2025-12-23T09:34:13.374177032Z", + "end_time": "2025-12-23T09:34:13.374195733Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1985, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/003d773c00279077fc83986a725be77c3ef0740f.json b/data/output/003d773c00279077fc83986a725be77c3ef0740f.json new file mode 100644 index 0000000..76956fa --- /dev/null +++ b/data/output/003d773c00279077fc83986a725be77c3ef0740f.json @@ -0,0 +1,432 @@ +{ + "file_name": "003d773c00279077fc83986a725be77c3ef0740f.txt", + "total_words": 939, + "top_n_words": [ + { + "word": "the", + "count": 61 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "is", + "count": 17 + }, + { + "word": "they", + "count": 16 + }, + { + "word": "it", + "count": 15 + }, + { + "word": "said", + "count": 14 + }, + { + "word": "in", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "...", + "length": 3 + }, + { + "text": "Stolen.", + "length": 7 + }, + { + "text": "They were stuck.", + "length": 16 + }, + { + "text": "One expert says no.", + "length": 19 + }, + { + "text": "The treasure is gone.", + "length": 21 + }, + { + "text": "They were white elephants.", + "length": 26 + }, + { + "text": "leading the investigation.", + "length": 26 + }, + { + "text": "The video is incredibly clear.", + "length": 30 + }, + { + "text": "\"It doesn't make sense to do it.", + "length": 32 + }, + { + "text": "\"They kept them in their closets.", + "length": 33 + }, + { + "text": "The team found the Atocha in 1985.", + "length": 34 + }, + { + "text": "You can't touch an Egyptian mummy.", + "length": 34 + }, + { + "text": "They made no money out of the deals.", + "length": 36 + }, + { + "text": "they can't show it, they can't enjoy it.", + "length": 40 + }, + { + "text": "This is a hands-on connection to history.", + "length": 41 + }, + { + "text": "85-ounce bar, valued at more than $550,000.", + "length": 43 + }, + { + "text": "\"They don't buy stolen property, because ...", + "length": 44 + }, + { + "text": "\" It was different because visitors could touch it.", + "length": 51 + }, + { + "text": "\" \"Ordinarily people don't get to touch something like that.", + "length": 60 + }, + { + "text": "Today, its case is broken, littered with black fingerprint dust.", + "length": 64 + }, + { + "text": "The items range from Colombian artifacts to Rembrandt paintings.", + "length": 64 + }, + { + "text": "First, they appeared to be targeting a display case of gold chains.", + "length": 67 + }, + { + "text": "Two thieves were caught in the act by the museum's security cameras.", + "length": 68 + }, + { + "text": "\"We recovered paintings and artifacts that were missing for many years.", + "length": 71 + }, + { + "text": "\" But now, what does a thief do with a priceless, high-profile artifact?", + "length": 72 + }, + { + "text": "It's not just any glass, but three-eighths-inch thick bulletproof Lexan glass.", + "length": 78 + }, + { + "text": "He said thieves often steal the items and then try to figure how to sell them.", + "length": 78 + }, + { + "text": "Ten, 15, sometimes 20 years, because the thieves couldn't get rid of them,\" he said.", + "length": 84 + }, + { + "text": "\"We're getting information and following leads,\" said Key West Police Chief Donie Lee.", + "length": 86 + }, + { + "text": "None has been recovered, and federal agents are using DNA to try to find the perpetrators.", + "length": 90 + }, + { + "text": "The stolen bar is one of dozens of gold and silver bars retrieved from the bottom of the sea.", + "length": 93 + }, + { + "text": "The ships were headed home to Spain with a cargo of gold, silver and coins from the new world.", + "length": 94 + }, + { + "text": "Wittman said no legitimate collector would take the risks associated with buying stolen goods.", + "length": 94 + }, + { + "text": "\"By designating this as a handling object, it brought certain risks to the bar,\" Kendrick said.", + "length": 95 + }, + { + "text": "Both ships had gone down in a hurricane off Key West shortly after leaving Havana, Cuba, in 1622.", + "length": 97 + }, + { + "text": "The FBI has recovered more than 2,600 items of cultural property valued at more than $142 million.", + "length": 98 + }, + { + "text": "By reaching into the specially designed display case, more than 6 million people have touched the 74.", + "length": 101 + }, + { + "text": "\" Wandering through the museum, the thieves can be seen in security video trying to open museum doors.", + "length": 102 + }, + { + "text": "Is there an underground market that will pay $550,000 for this almost 400-year-old piece of solid gold?", + "length": 103 + }, + { + "text": "Key West authorities said they believe the thieves were not locals and that they are probably long gone.", + "length": 104 + }, + { + "text": "The museum's insurance company is offering a $25,000 reward for information leading to the return of the bar.", + "length": 109 + }, + { + "text": "\" What makes the crime so shocking, police said, is that the thieves were able to snap the glass at its edges.", + "length": 110 + }, + { + "text": "\" Treasure hunter and salvor Mel Fisher recovered the solid gold bar from the wreck of the Santa Margarita in 1980.", + "length": 115 + }, + { + "text": "\"They're touching something that belonged to someone in 1622,\" said Carol Shaughnessy, author of \"Diving Into Glory.", + "length": 116 + }, + { + "text": "Police said they remain hopeful they will solve the crime but just hope they can recover this golden piece of history.", + "length": 118 + }, + { + "text": "\"This is a special piece,\" said Melissa Kendrick, executive director of the Mel Fisher Maritime Museum in Key West, Florida.", + "length": 124 + }, + { + "text": "Experts say that about 90 percent of stolen art and artifacts is eventually recovered that but it often takes years to find.", + "length": 124 + }, + { + "text": "Fisher and his team had been searching for the Nuestra Senora de Atocha and instead found the sister ship, the Santa Margarita.", + "length": 127 + }, + { + "text": "\"All the pieces have an incredible historic value, but this is the piece that was shared with the public in a whole totally different way.", + "length": 138 + }, + { + "text": "\"But after your first five, and your next 10, and when you get to 25 years, you start to get to the point when you think that it's never going to happen.", + "length": 153 + }, + { + "text": "\" In 1990, thieves entered the Isabella Stewart Gardner Museum in Boston, Massachusetts, and stole 13 works of art, including three Rembrandts from the 1600s.", + "length": 158 + }, + { + "text": "\"That's why these crimes don't make a whole lot of money for the criminals,\" said Robert Wittman, a former FBI agent who once headed the FBI's Art Crime Team.", + "length": 158 + }, + { + "text": "\"Unfortunately we haven't got the best lead, which is, I know that person and we go out, and it's a positive ID, and we're able to go out and pick those guys up.", + "length": 161 + }, + { + "text": "Key West, Florida (CNN) -- For more than 20 years, the bulletproof museum case housed a small piece of yesteryear: a gold bar recovered from a sunken Spanish galleon.", + "length": 166 + }, + { + "text": "\"Other than melting it down, which is the worst-case scenario for everyone, we're just hoping that they will come to their senses somehow and return this back to the museum.", + "length": 173 + }, + { + "text": "Wittman, the former FBI agent and author of \"Priceless: How I Went Undercover to Rescue the World's Stolen Treasures,\" said the market is incredibly small for these high-profile objects.", + "length": 186 + }, + { + "text": "Then, after a security guard left this part of the museum, a man can be seen reaching into the case housing the gold bar and placing the little piece of history into his pocket before exiting the museum.", + "length": 203 + }, + { + "text": "It makes them into criminals, and the last thing they want to do is spend a lot of money for a painting or for an artifact, whether it's gold or whatever, and have it seized by the police and go to jail,\" he said.", + "length": 213 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6780620217323303 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:13.874395453Z", + "first_section_created": "2025-12-23T09:34:13.874764068Z", + "last_section_published": "2025-12-23T09:34:13.875254588Z", + "all_results_received": "2025-12-23T09:34:13.956826886Z", + "output_generated": "2025-12-23T09:34:13.957014593Z", + "total_processing_time_ms": 82, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 81, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:13.874764068Z", + "publish_time": "2025-12-23T09:34:13.875068181Z", + "first_worker_start": "2025-12-23T09:34:13.875607102Z", + "last_worker_end": "2025-12-23T09:34:13.955909Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:13.875657204Z", + "start_time": "2025-12-23T09:34:13.875741608Z", + "end_time": "2025-12-23T09:34:13.875838812Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:13.875874Z", + "start_time": "2025-12-23T09:34:13.876031Z", + "end_time": "2025-12-23T09:34:13.955909Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:13.875656504Z", + "start_time": "2025-12-23T09:34:13.875729407Z", + "end_time": "2025-12-23T09:34:13.875849912Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:13.875516699Z", + "start_time": "2025-12-23T09:34:13.875607102Z", + "end_time": "2025-12-23T09:34:13.875674205Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:13.875150284Z", + "publish_time": "2025-12-23T09:34:13.875254588Z", + "first_worker_start": "2025-12-23T09:34:13.875691806Z", + "last_worker_end": "2025-12-23T09:34:13.917484Z", + "total_journey_time_ms": 42, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:13.875707306Z", + "start_time": "2025-12-23T09:34:13.875738708Z", + "end_time": "2025-12-23T09:34:13.875745608Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:13.875987Z", + "start_time": "2025-12-23T09:34:13.876168Z", + "end_time": "2025-12-23T09:34:13.917484Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 41 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:13.87603222Z", + "start_time": "2025-12-23T09:34:13.87629163Z", + "end_time": "2025-12-23T09:34:13.876301531Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:13.875643404Z", + "start_time": "2025-12-23T09:34:13.875691806Z", + "end_time": "2025-12-23T09:34:13.875696806Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 120, + "min_processing_ms": 41, + "max_processing_ms": 79, + "avg_processing_ms": 60, + "median_processing_ms": 79, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2598, + "slowest_section_id": 0, + "slowest_section_time_ms": 81 + } +} diff --git a/data/output/003e026b81c3e0c0e1c05472b16160d4fb62fa29.json b/data/output/003e026b81c3e0c0e1c05472b16160d4fb62fa29.json new file mode 100644 index 0000000..62062c8 --- /dev/null +++ b/data/output/003e026b81c3e0c0e1c05472b16160d4fb62fa29.json @@ -0,0 +1,660 @@ +{ + "file_name": "003e026b81c3e0c0e1c05472b16160d4fb62fa29.txt", + "total_words": 1689, + "top_n_words": [ + { + "word": "the", + "count": 78 + }, + { + "word": "i", + "count": 54 + }, + { + "word": "to", + "count": 50 + }, + { + "word": "he", + "count": 38 + }, + { + "word": "in", + "count": 37 + }, + { + "word": "a", + "count": 32 + }, + { + "word": "mccoy", + "count": 30 + }, + { + "word": "s", + "count": 28 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "it", + "count": 24 + } + ], + "sorted_sentences": [ + { + "text": "Now there is more.", + "length": 18 + }, + { + "text": "Cup Leopardstown .", + "length": 18 + }, + { + "text": "But that’s not me.", + "length": 20 + }, + { + "text": "But here’s the rub.", + "length": 21 + }, + { + "text": "I am an all-in person.", + "length": 22 + }, + { + "text": "‘No adrenaline rush.", + "length": 22 + }, + { + "text": "‘That’s just not me.", + "length": 24 + }, + { + "text": "That’s how he sees it.", + "length": 24 + }, + { + "text": "‘No thrill,’ he said.", + "length": 25 + }, + { + "text": "Those are not the reasons.", + "length": 26 + }, + { + "text": "And Norbert Dentressangle.", + "length": 26 + }, + { + "text": "That’s just the way I am.", + "length": 27 + }, + { + "text": "That is the toughest thing.", + "length": 27 + }, + { + "text": "He knows that and so do we.", + "length": 27 + }, + { + "text": "All his nonchalance was gone.", + "length": 29 + }, + { + "text": "I do think I could perform OK.", + "length": 30 + }, + { + "text": "‘With no problem,’ he said.", + "length": 31 + }, + { + "text": "McCoy has never been nonchalant.", + "length": 32 + }, + { + "text": "He has been far from nonchalant.", + "length": 32 + }, + { + "text": "I am a very stubborn human being.", + "length": 33 + }, + { + "text": "The best part of his life is over.", + "length": 34 + }, + { + "text": "That’s what happens to sportsmen.", + "length": 35 + }, + { + "text": "Evander Holyfield is not far behind.", + "length": 36 + }, + { + "text": "Like having his doctor on speed dial.", + "length": 37 + }, + { + "text": "Except, of course, McCoy’s teetotal.", + "length": 38 + }, + { + "text": "It’s downhill at a canter from here.", + "length": 38 + }, + { + "text": "Some things are worth standing up for.", + "length": 38 + }, + { + "text": "’ This is not an unfamiliar scenario.", + "length": 39 + }, + { + "text": "‘I still really, really enjoy riding.", + "length": 39 + }, + { + "text": "It’s the only thing I ever want to do.", + "length": 40 + }, + { + "text": "Muhammad Ali is the most obvious example.", + "length": 41 + }, + { + "text": "‘It’s the only thing I have ever done.", + "length": 42 + }, + { + "text": "It feels like McCoy is in mourning already.", + "length": 43 + }, + { + "text": "Next year, he wants to go to the US Masters.", + "length": 44 + }, + { + "text": "There was no chance of him changing his mind.", + "length": 45 + }, + { + "text": "‘That isn’t going to happen,’ said McCoy.", + "length": 47 + }, + { + "text": "‘I often think I am not sure why I’m retiring.", + "length": 50 + }, + { + "text": "He has lived with pain through much of his career.", + "length": 50 + }, + { + "text": "Because I think, mentally and physically, I could.", + "length": 50 + }, + { + "text": "He was deluged with abuse but he did the right thing.", + "length": 53 + }, + { + "text": "‘Thank God I didn’t get to the Emirates,’ he said.", + "length": 56 + }, + { + "text": "They fear the loss of the rage, the thrill of the fight.", + "length": 56 + }, + { + "text": "There are going to be a lot of things missing in my life.", + "length": 57 + }, + { + "text": "The Hangover Part IV may be in the pipeline at this rate.", + "length": 57 + }, + { + "text": "But there is something apt about McCoy doing it this way.", + "length": 57 + }, + { + "text": "In fact, he’s planning two trips to Vegas in two months.", + "length": 58 + }, + { + "text": "When you take the money, there is sometimes a price to pay.", + "length": 59 + }, + { + "text": "Europe’s clubs pay the price for opening Pandora’s box .", + "length": 60 + }, + { + "text": "But that isn’t making the spectre of retirement any easier.", + "length": 61 + }, + { + "text": "Like clocking up more miles on the motorway than Eddie Stobart.", + "length": 63 + }, + { + "text": "It was as if he suddenly realised what it had all meant to him.", + "length": 63 + }, + { + "text": "‘I was actually racing in Bangor but I saw some of it on the TV.", + "length": 66 + }, + { + "text": "‘The hardest thing is that I’m still enjoying it,’ said McCoy.", + "length": 68 + }, + { + "text": "I was thinking, “I’m not the only one who should be retiring”.", + "length": 68 + }, + { + "text": "‘I wish I was Peter Pan and I could play for ever,’ said Fowler.", + "length": 68 + }, + { + "text": "He’d like to play a bit more golf and watch a bit more of Arsenal.", + "length": 68 + }, + { + "text": "’ I have always admired Kevin Kilbane and now I admire him even more.", + "length": 71 + }, + { + "text": "‘I’m not retiring because I don’t think I could compete any more.", + "length": 71 + }, + { + "text": "He said he could ride another 700 winners, thus reaching 5,000 in total.", + "length": 72 + }, + { + "text": "The press release had pointed out it would be AP’s ‘last’ Cheltenham.", + "length": 75 + }, + { + "text": "’ There is, though, one possible hitch emerging in his post-racing plans.", + "length": 75 + }, + { + "text": "He’s thinking of heading over to Mayweather-Pacquiao in Las Vegas on May 2.", + "length": 77 + }, + { + "text": "McCoy celebrates with JP Mcmanus (left) and Princess Ann following his triumph .", + "length": 80 + }, + { + "text": "‘I always had it in my mind that this is the way I wanted it to be,’ he said.", + "length": 81 + }, + { + "text": "His lament for a part of his life that is soon to be lost sounds achingly familiar.", + "length": 83 + }, + { + "text": "I don’t mean this in an arrogant way but I don’t just want to be a normal jockey.", + "length": 85 + }, + { + "text": "AP McCoy is treated by paramedics after falling from Synchronised at Becher's Brook .", + "length": 85 + }, + { + "text": "McCoy will partner Grand National favourite Shutthefrontdoor at Aintree on April 11 .", + "length": 85 + }, + { + "text": "McCoy celebrates wining the Cheltenham Gold up in 2012 on Synchronised at Cheltenham .", + "length": 86 + }, + { + "text": "The last time he’ll ride at Towcester, or Sedgefield, or Market Rasen, or Worcester.", + "length": 86 + }, + { + "text": "His career was nearing its end but Fowler was eking everything he could from its embers.", + "length": 88 + }, + { + "text": "But when McCoy talks about it, he makes it sound like a long fall from light into darkness.", + "length": 91 + }, + { + "text": "It happens even to those who appeared to dance through their careers with great nonchalance.", + "length": 92 + }, + { + "text": "‘However I do this year, it’s going to be better than next year,’ McCoy said, deadpan.", + "length": 92 + }, + { + "text": "The last time he’ll beat Richard Johnson into second place in the Jockeys’ Championship.", + "length": 92 + }, + { + "text": "McCoy fell again at Becher's Brook in the 2012 Grand National off of his horse Synchronised .", + "length": 93 + }, + { + "text": "Mr Mole gave soon to retire 19-time champion McCoy his 200th winner of the season at Newbury .", + "length": 94 + }, + { + "text": "When the lights go down, sport’s greatest players often find real life a difficult anticlimax.", + "length": 96 + }, + { + "text": "Sepp Blatter leaves the International Football Assocation Board meeting in Belfast on Saturday .", + "length": 96 + }, + { + "text": "Tony McCoy riding to win The Agetur UK Novices' Limited Handicap Chase at Newbury on Friday .", + "length": 96 + }, + { + "text": "McCoy and JP McManus celebrates as Carlingford Lough wins the Galway Plate at Galway Racecourse .", + "length": 97 + }, + { + "text": "Like going out for a meal in the evening and gulping down a spoonful of guilt with every mouthful.", + "length": 98 + }, + { + "text": "Like being on first-name terms with hospital staff in pretty much every racing town in the country.", + "length": 99 + }, + { + "text": "McCoy is all smiles as he lifts the Cheltenham Gold cup after riding Synchronised to glory in 2012 .", + "length": 100 + }, + { + "text": "Kevin Kilbane complained to the FA about West Ham fans chanting about Tottenham striker Harry Kane .", + "length": 100 + }, + { + "text": "Retiring from a sport that demands so much of him physically and mentally might sound like a release.", + "length": 101 + }, + { + "text": "McCoy speaks about his retirement during a press conference prior to the Cheltenham Festival meeting .", + "length": 102 + }, + { + "text": "He is confident he would have been crowned Champion Jockey for the 21st season in succession next year.", + "length": 103 + }, + { + "text": "Like other Arsenal fans, he watched the team’s capitulation to Monaco on Wednesday night with dismay.", + "length": 103 + }, + { + "text": "I don’t think I’m any worse than I was or that I’m frightened of the younger lads coming through.", + "length": 103 + }, + { + "text": "He mentioned Sir Alex Ferguson and Joe Calzaghe as men who went out at the top, men he wants to emulate.", + "length": 104 + }, + { + "text": "McCoy on his way to winning the Cheltenham Gold Cup Steeplechase race at Cheltenham Racecourse in 2012 .", + "length": 104 + }, + { + "text": "’ McCoy falls from Butler's Cabin as they come over Becher's Brook during the Grand National in 2008 .", + "length": 105 + }, + { + "text": "So, sure, when it is all over, the greatest jump jockey there has ever been may indulge himself a little.", + "length": 105 + }, + { + "text": "’ McCoy will be hoping Carlingford Lough (pictured) will give him a winner in the Gold Cup at Cheltenham .", + "length": 108 + }, + { + "text": "‘What I would love to be able to do is retire tomorrow and come back as someone else and carry on as normal.", + "length": 110 + }, + { + "text": "McCoy, who will retire this season, poses with trainer Jonjo O'Neill at Cheltenham racecourse in October 2013 .", + "length": 111 + }, + { + "text": "’ McCoy is already thinking of what might have been had he not taken the decision to dismount one final time.", + "length": 111 + }, + { + "text": "There are some things that AP McCoy will not miss when he brings his astonishing racing career to an end this spring.", + "length": 117 + }, + { + "text": "I think every morning when I wake up “I don’t know why I’m retiring”, but I know it’s the right thing to do.", + "length": 118 + }, + { + "text": "That’s the way it is for the man who has dominated his sport for 20 years: every day for McCoy is a ‘last’ something now.", + "length": 127 + }, + { + "text": "He took a bit of joshing in Lambourn from the compere, Luke Harvey, about how he might end his last Cheltenham without a winner.", + "length": 128 + }, + { + "text": "‘I won’t ever be able to enjoy anything like I have been able to enjoy riding,’ the 40-year-old said in Lambourn on Friday.", + "length": 129 + }, + { + "text": "‘So gimme a stage, where this bull here can rage,’ Robert de Niro’s Jake LaMotta says as he reads his doggerel in Raging Bull.", + "length": 132 + }, + { + "text": "It happens to fighters most often, although maybe that is just because their reluctance to leave the stage can be so desperately damaging.", + "length": 138 + }, + { + "text": "I often think that if I was to carry on and ride three or four days a week and ride in the big races I could carry on riding for another five years.", + "length": 148 + }, + { + "text": "The last time he’ll ride up the hill to the finish at Cheltenham, the last chances he’ll get to add to the record 30 winners he has ridden there.", + "length": 149 + }, + { + "text": "‘I’m not saying that I wanted to suffer but I wanted to retire when I was still enjoying it and it was still going to be a very difficult decision.", + "length": 151 + }, + { + "text": "There, in the little racing town nestled amid the rolling beauty of the Berkshire Downs, McCoy gave a press conference to talk about the approach of the Cheltenham Festival.", + "length": 173 + }, + { + "text": "McCoy admitted: 'I won’t ever be able to enjoy anything like I have been able to enjoy riding' McCoy on his way to winning The Physicool 'National Hunt' Novices' Hurdle Race at Newbury on Friday .", + "length": 198 + }, + { + "text": "Kilbane has a 10-year-old daughter, Elsie, who has Down’s Syndrome, and last week he complained to the FA about West Ham fans singing a song that compared Spurs striker Harry Kane with a ‘****’.", + "length": 200 + }, + { + "text": "McCoy tucks into a dish which has been specially created by food scientist Dr Rachel Edwards-Stuart, who spent three years creating innovative flavour and texture experiences for TV chef Heston Blumenthal .", + "length": 206 + }, + { + "text": "I spoke to Robbie Fowler, one of football’s most iconoclastic characters, a few years ago when he had just come off the pitch in Brisbane after he had played in front of a sparse crowd at an A-League game.", + "length": 207 + }, + { + "text": "In the middle of the disquiet about the serpentine writhing that led to Fifa’s confirmation that the 2022 World Cup in Qatar would take place in our winter, it might serve us well to pull our punches just a little.", + "length": 216 + }, + { + "text": "Nothing excuses the wretched machinations of Sepp Blatter and FIFA but when leading European clubs welcomed massive investment from Qatar and elsewhere in the Gulf, we legitimised those nations as major players in world football.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4772554785013199 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:14.376363945Z", + "first_section_created": "2025-12-23T09:34:14.377803004Z", + "last_section_published": "2025-12-23T09:34:14.378257022Z", + "all_results_received": "2025-12-23T09:34:14.462660734Z", + "output_generated": "2025-12-23T09:34:14.462932745Z", + "total_processing_time_ms": 86, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 84, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:14.377803004Z", + "publish_time": "2025-12-23T09:34:14.378073915Z", + "first_worker_start": "2025-12-23T09:34:14.378592635Z", + "last_worker_end": "2025-12-23T09:34:14.454237Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:14.378620737Z", + "start_time": "2025-12-23T09:34:14.378736041Z", + "end_time": "2025-12-23T09:34:14.378853946Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:14.378939Z", + "start_time": "2025-12-23T09:34:14.379101Z", + "end_time": "2025-12-23T09:34:14.454237Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:14.378572835Z", + "start_time": "2025-12-23T09:34:14.378677939Z", + "end_time": "2025-12-23T09:34:14.378805444Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:14.378496232Z", + "start_time": "2025-12-23T09:34:14.378592635Z", + "end_time": "2025-12-23T09:34:14.378645138Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:14.378111916Z", + "publish_time": "2025-12-23T09:34:14.378257022Z", + "first_worker_start": "2025-12-23T09:34:14.378782343Z", + "last_worker_end": "2025-12-23T09:34:14.461727Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:14.378752742Z", + "start_time": "2025-12-23T09:34:14.378797344Z", + "end_time": "2025-12-23T09:34:14.378869847Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:14.379206Z", + "start_time": "2025-12-23T09:34:14.379341Z", + "end_time": "2025-12-23T09:34:14.461727Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:14.378767643Z", + "start_time": "2025-12-23T09:34:14.378814844Z", + "end_time": "2025-12-23T09:34:14.378903448Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:14.378742642Z", + "start_time": "2025-12-23T09:34:14.378782343Z", + "end_time": "2025-12-23T09:34:14.378816545Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 157, + "min_processing_ms": 75, + "max_processing_ms": 82, + "avg_processing_ms": 78, + "median_processing_ms": 82, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4602, + "slowest_section_id": 1, + "slowest_section_time_ms": 83 + } +} diff --git a/data/output/003e14dc2a71d72efd9f47af3962d9352b151c84.json b/data/output/003e14dc2a71d72efd9f47af3962d9352b151c84.json new file mode 100644 index 0000000..c503f6f --- /dev/null +++ b/data/output/003e14dc2a71d72efd9f47af3962d9352b151c84.json @@ -0,0 +1,290 @@ +{ + "file_name": "003e14dc2a71d72efd9f47af3962d9352b151c84.txt", + "total_words": 536, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "carbon", + "count": 8 + }, + { + "word": "that", + "count": 8 + }, + { + "word": "as", + "count": 7 + }, + { + "word": "cats", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Mark Prigg .", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "The rest were plant-eaters.", + "length": 27 + }, + { + "text": "19:00 EST, 6 November 2012 .", + "length": 28 + }, + { + "text": "12:27 EST, 7 November 2012 .", + "length": 28 + }, + { + "text": "They isolated the carbon from the tooth enamel.", + "length": 47 + }, + { + "text": "Sabre-toothed tigers feasted on prehistoric horses, researchers believe.", + "length": 72 + }, + { + "text": "The signature travels through the food chain and can be found in carnivores as well.", + "length": 84 + }, + { + "text": "'The smaller cat probably incorporated additional prey species from more dense woodland.", + "length": 88 + }, + { + "text": "' He said a study of the bear dog found it 'implied prey acquisition from open woodland .", + "length": 89 + }, + { + "text": "'Analysis indicated that hipparionine horses were moderately to very feasible prey of these cats.", + "length": 97 + }, + { + "text": "An isotope is a version of an element that contains a different number of neutrons in its nucleus.", + "length": 98 + }, + { + "text": "Carbon 12 and 13 are both present in the carbon dioxide that plants take in during photosynthesis.", + "length": 98 + }, + { + "text": "When an herbivore eats a plant, that plant leaves an isotopic signature in the animal's bones and teeth.", + "length": 104 + }, + { + "text": "The team calculated what kind of creatures lived in the area nine million years ago from dental records.", + "length": 104 + }, + { + "text": "'Diet preferences indicate it was the only predator commonly consuming astroportax, that occupied more open woodland.", + "length": 117 + }, + { + "text": "This illustration depicts how the region of Cerro de los Batallones in central Spain likely looked 9 million years ago.", + "length": 119 + }, + { + "text": "Different plants make use of the isotopes in different ways, and so they retain different amounts of them in their fibers.", + "length": 122 + }, + { + "text": "Little has been known about how the sabre toothed tiger, shown here in a California museum, co-existed with other species .", + "length": 123 + }, + { + "text": "' To arrive at their findings, the researchers conducted what's called a stable carbon isotope analysis on the animals' teeth.", + "length": 126 + }, + { + "text": "Using a dentist's drill with a diamond bit, they sampled teeth from 69 specimens, including 27 saber-toothed cats and bear dogs.", + "length": 128 + }, + { + "text": "Researchers used carbon to shed light on how saber-toothed cats and bear dogs shared space and prey during the late Miocene period .", + "length": 132 + }, + { + "text": "The diets of ancient apex predators is difficult to discern, because as they were at the top of the food chain there were fewer of them.", + "length": 136 + }, + { + "text": "'On the basis of their different body sizes it is likely that these two cats avoided competition for prey by hunting herbivores of different size.", + "length": 146 + }, + { + "text": "They then compared the carbon isotope of the three predators to those of the herbivores, allowing them to make a calculation as to what the beasts ate.", + "length": 151 + }, + { + "text": "Researchers studied two sabre-toothed cats - one the size of a modern leopard and the other the size of a modern tiger - as well as a prehistoric 'bear dog'.", + "length": 157 + }, + { + "text": "He said: 'The two sabre toothed cats consumed herbivores from more dense woodland, a habitat that would have allowed the smaller cat to hide from the larger one.", + "length": 161 + }, + { + "text": "Using a mass spectrometer, which you could think of as a type of scale, they measured the ratio of the more massive carbon 13 molecules to the less-massive carbon 12.", + "length": 166 + }, + { + "text": "Dr Soledad Domingo, of the University of Michigan, said that the sabre toothed cats ate ancient horses, while the bear dog ate astroportax, similar to the modern buffalo.", + "length": 170 + }, + { + "text": "But 'exceptional fossil sites' in Cerro de los Batallones, near Madrid, have yielded clues as to what three ancient carnivores fed upon, reports journal Proceedings of the Royal Society B.", + "length": 188 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5474167466163635 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:14.879053666Z", + "first_section_created": "2025-12-23T09:34:14.879304877Z", + "last_section_published": "2025-12-23T09:34:14.879496984Z", + "all_results_received": "2025-12-23T09:34:14.951430492Z", + "output_generated": "2025-12-23T09:34:14.951588699Z", + "total_processing_time_ms": 72, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:14.879304877Z", + "publish_time": "2025-12-23T09:34:14.879496984Z", + "first_worker_start": "2025-12-23T09:34:14.880070208Z", + "last_worker_end": "2025-12-23T09:34:14.950424Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:14.880033506Z", + "start_time": "2025-12-23T09:34:14.880099109Z", + "end_time": "2025-12-23T09:34:14.880168612Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:14.880289Z", + "start_time": "2025-12-23T09:34:14.880424Z", + "end_time": "2025-12-23T09:34:14.950424Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:14.880046507Z", + "start_time": "2025-12-23T09:34:14.880099309Z", + "end_time": "2025-12-23T09:34:14.880164811Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:14.880008905Z", + "start_time": "2025-12-23T09:34:14.880070208Z", + "end_time": "2025-12-23T09:34:14.880094709Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3176, + "slowest_section_id": 0, + "slowest_section_time_ms": 71 + } +} diff --git a/data/output/003e481a54210e79e05ea4365d1f6a11e583fad5.json b/data/output/003e481a54210e79e05ea4365d1f6a11e583fad5.json new file mode 100644 index 0000000..f08500a --- /dev/null +++ b/data/output/003e481a54210e79e05ea4365d1f6a11e583fad5.json @@ -0,0 +1,286 @@ +{ + "file_name": "003e481a54210e79e05ea4365d1f6a11e583fad5.txt", + "total_words": 653, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "on", + "count": 22 + }, + { + "word": "i", + "count": 20 + }, + { + "word": "she", + "count": 17 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "her", + "count": 12 + }, + { + "word": "benefits", + "count": 11 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "and", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "I'm not f***ing getting off of benefits.", + "length": 40 + }, + { + "text": "If I wanted a Barbie, I'd f***ing get it.", + "length": 41 + }, + { + "text": "When my kids are in school I'll get a part time job.", + "length": 52 + }, + { + "text": "'From being young, I've had everything I ever wanted.", + "length": 53 + }, + { + "text": "That is just as simple as that,' she said on the show in 2014.", + "length": 62 + }, + { + "text": "But she doesn't end up attending because she can't get childcare.", + "length": 65 + }, + { + "text": "Dean said: 'Eventually I would like to get a job when I can find one.", + "length": 69 + }, + { + "text": "Benefits Britain: Life on the Dole, Monday night at 9pm on Channel 5 .", + "length": 70 + }, + { + "text": "I'm not taking the p*** I'm asking for this help because I don't work.", + "length": 70 + }, + { + "text": "Dean Rowley, 19, pictured,  has been on benefits since he left school three years ago .", + "length": 88 + }, + { + "text": "She says on the show (broadcast this evening): 'So many people take p*** out of the system.", + "length": 91 + }, + { + "text": "' The mother-of-two says she can't get a job as she wants to see her two children grow up .", + "length": 91 + }, + { + "text": "Stephanie Cocker pictured on this evening's Channel 5 show Benefits Britain: Life On The Dole .", + "length": 95 + }, + { + "text": "She added: 'I'd rather work nine to five and get less money but I want to watch my kids grow up too.", + "length": 100 + }, + { + "text": "I spend most time in my bedroom, watch TV, surfing the internet, listening to music, taking selfies.", + "length": 100 + }, + { + "text": "Tough times: Thousands people in Steph's hometown Sheffield are currently out of work and on benefits .", + "length": 103 + }, + { + "text": "Steph said she was 'happy as Larry' with her taxpayer-funded life when she was on the show in 2014, pictured .", + "length": 110 + }, + { + "text": "She remains out of work and on benefits, blaming it on the fact she has to look after her children aged two and four.", + "length": 117 + }, + { + "text": "Now a year on, the cameras have returned to Steph's council flat where she has defended herself against the criticism.", + "length": 118 + }, + { + "text": "Last year single mother-of-two Stephanie Cocker provoked outrage when she boasted about her 'cushty' life on benefits.", + "length": 118 + }, + { + "text": "Her hometown of Sheffield, is one of the most deprived parts of the UK, with thousands out of work and dependent on the dole.", + "length": 125 + }, + { + "text": "He lives at home with his mother so he spends the money by buying the things he needs like headphones and mobile phone cases.", + "length": 125 + }, + { + "text": "After her story appeared on MailOnline, furious taxpayers accused her of being a 'scrounger' and making them 'look like mugs'.", + "length": 126 + }, + { + "text": "Meanwhile in Essex, the show follows Dean Rowley, 19, who has been claiming £60 a week benefits since he left school three years ago.", + "length": 135 + }, + { + "text": "She explains: 'The college course I was going to got all f***ed up as I'm relying on my mum to watch the kids because it's not in nursery times.", + "length": 144 + }, + { + "text": "' During the show, Steph, who left school at 14, is seen enrolling for a college course to help further her chances of eventually getting a job.", + "length": 144 + }, + { + "text": "' The teenager admits he finds it hard to get out of bed and even phones his mother from his room when she's downstairs so he doesn't have to move.", + "length": 147 + }, + { + "text": "'I'm not going to work, come pick the kids up, go to my mum's, care for them and that's every day seven day I week, I can't do that, I'll have no life,' she said.", + "length": 162 + }, + { + "text": "Stephanie first appeared on the Channel 5 show with her friend Travis, also on benefits, pictured left and with her on the programme seen on the TV screen, right .", + "length": 163 + }, + { + "text": "The 25-year-old from Sheffield appeared on Channel 5 show Benefits Britain: Life On The Dole where she revealed her £300 a week handouts made her 'as happy as Larry'.", + "length": 167 + }, + { + "text": "However, he soon realises, 'it's not healthy for 19-year-old to sit in his bedroom all day doing nothing' when his benefits are cut off for five weeks because he fails to keep up his appointments at the job centre.", + "length": 214 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.44754284620285034 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:15.37981261Z", + "first_section_created": "2025-12-23T09:34:15.380153823Z", + "last_section_published": "2025-12-23T09:34:15.380350431Z", + "all_results_received": "2025-12-23T09:34:15.450237256Z", + "output_generated": "2025-12-23T09:34:15.450419164Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:15.380153823Z", + "publish_time": "2025-12-23T09:34:15.380350431Z", + "first_worker_start": "2025-12-23T09:34:15.381074961Z", + "last_worker_end": "2025-12-23T09:34:15.449263Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:15.38106396Z", + "start_time": "2025-12-23T09:34:15.381137563Z", + "end_time": "2025-12-23T09:34:15.381194765Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:15.381271Z", + "start_time": "2025-12-23T09:34:15.381415Z", + "end_time": "2025-12-23T09:34:15.449263Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:15.381115862Z", + "start_time": "2025-12-23T09:34:15.381188665Z", + "end_time": "2025-12-23T09:34:15.381275569Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:15.381010658Z", + "start_time": "2025-12-23T09:34:15.381074961Z", + "end_time": "2025-12-23T09:34:15.381106462Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3356, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/003e707be68bede1fc49ac1bbfec0edebed52fe4.json b/data/output/003e707be68bede1fc49ac1bbfec0edebed52fe4.json new file mode 100644 index 0000000..54b4272 --- /dev/null +++ b/data/output/003e707be68bede1fc49ac1bbfec0edebed52fe4.json @@ -0,0 +1,588 @@ +{ + "file_name": "003e707be68bede1fc49ac1bbfec0edebed52fe4.txt", + "total_words": 1089, + "top_n_words": [ + { + "word": "the", + "count": 76 + }, + { + "word": "a", + "count": 46 + }, + { + "word": "of", + "count": 40 + }, + { + "word": "was", + "count": 22 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "richard", + "count": 17 + }, + { + "word": "is", + "count": 15 + }, + { + "word": "s", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "A .", + "length": 3 + }, + { + "text": "UK'.", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "'It .", + "length": 5 + }, + { + "text": "2009.", + "length": 5 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "1800s.", + "length": 6 + }, + { + "text": "Miss .", + "length": 6 + }, + { + "text": "Miss .", + "length": 6 + }, + { + "text": "buried.", + "length": 7 + }, + { + "text": "' Miss .", + "length": 8 + }, + { + "text": "history.", + "length": 8 + }, + { + "text": "Richard .", + "length": 9 + }, + { + "text": "Because .", + "length": 9 + }, + { + "text": "Luckily, .", + "length": 10 + }, + { + "text": "Screenwriter .", + "length": 14 + }, + { + "text": "Plantagenet king .", + "length": 18 + }, + { + "text": "were those of Richard.", + "length": 22 + }, + { + "text": "was made last September.", + "length": 24 + }, + { + "text": "when they were installed.", + "length": 25 + }, + { + "text": "archaeologist revealed today.", + "length": 29 + }, + { + "text": "when the foundations were built.", + "length": 32 + }, + { + "text": "by builders developing the area.", + "length": 32 + }, + { + "text": "They say their analysis aims to .", + "length": 33 + }, + { + "text": "that of the last Plantagenet king.", + "length": 34 + }, + { + "text": "They found that, while there was no .", + "length": 37 + }, + { + "text": "his grave,' she told the Sunday Times.", + "length": 38 + }, + { + "text": "outhouse was built on top of the site.", + "length": 38 + }, + { + "text": "academics and will be screened tonight.", + "length": 39 + }, + { + "text": "manifested in control freak tendencies.", + "length": 39 + }, + { + "text": "and battle injuries suffered by the last .", + "length": 42 + }, + { + "text": "' In addition, the pair examined how his .", + "length": 42 + }, + { + "text": "Mr Buckley said that the king's remains were .", + "length": 46 + }, + { + "text": "interacted with people who he did not know well.", + "length": 48 + }, + { + "text": "Researchers now say Shakespeare's portrayal of him was unfair .", + "length": 63 + }, + { + "text": "The hunchback king: The skeleton or Richard III, unearthed in a .", + "length": 65 + }, + { + "text": "Kevin Spacey appears as Richard III in Old Vic Theatre in London.", + "length": 65 + }, + { + "text": "Philippa Langley, pictured right, said she felt a chill on a hot .", + "length": 66 + }, + { + "text": "the grave was only slightly disturbed by the building's primitive .", + "length": 67 + }, + { + "text": "resting place came close to being 'completely destroyed' when the .", + "length": 67 + }, + { + "text": "Langley was strolling across the car park used by Leicester social .", + "length": 68 + }, + { + "text": "527-year-old skeleton - buried with its hands crossed in a shallow .", + "length": 68 + }, + { + "text": "said the her play has been turned into a script for television and .", + "length": 68 + }, + { + "text": "dig last September, showing evidence of the curvature of the spine .", + "length": 68 + }, + { + "text": "team searching for the king's body excavated the area last September.", + "length": 69 + }, + { + "text": "remains of Richard III came within a matter of inches of being lost .", + "length": 69 + }, + { + "text": "almost flushed down the pan when the outhouse was constructed in the .", + "length": 70 + }, + { + "text": "grave - was missing its feet, which experts believe were smashed off .", + "length": 70 + }, + { + "text": "I walked past a particular spot and absolutely knew I was walking on .", + "length": 70 + }, + { + "text": "of the man who became one of the most controversial kings in English .", + "length": 70 + }, + { + "text": "three-week dig by experts from the University of Leicester unearthed .", + "length": 70 + }, + { + "text": "'We believe this is an interesting perspective on Richard's character.", + "length": 70 + }, + { + "text": "Langley, who is a member of the Richard III Society, is working on a .", + "length": 70 + }, + { + "text": "humanise Richard and 'to flesh out the bones and get to the character .", + "length": 71 + }, + { + "text": "may have had 'intolerance to uncertainty syndrome' – which may have .", + "length": 71 + }, + { + "text": "documentary charting the excavation for Channel 4 titled Richard III: .", + "length": 71 + }, + { + "text": "his remains were buried just 27in under the flag stones of Greyfriars .", + "length": 71 + }, + { + "text": "foundations, which missed the deceased monarch's body by a few inches .", + "length": 71 + }, + { + "text": "was a hot summer and I had goosebumps so badly and I was freezing cold.", + "length": 71 + }, + { + "text": "accounts of Richard, have both a curved spine back and wounded skull, .", + "length": 71 + }, + { + "text": "remarkable discovery of the remains, which, consistent with historical .", + "length": 72 + }, + { + "text": "The King in the Car Park, which has been made alongside the university .", + "length": 72 + }, + { + "text": "services while researching a play about the king when she felt a chill .", + "length": 72 + }, + { + "text": "the skeleton which was confirmed 'beyond reasonable doubt' today to be .", + "length": 72 + }, + { + "text": "forever when a 19th century toilet was built above the skeleton, a lead .", + "length": 73 + }, + { + "text": "Council car park because she was '99 per cent certain' that the remains .", + "length": 73 + }, + { + "text": "Langley initially funded the excavation of what is now a Leicester City .", + "length": 73 + }, + { + "text": "summer's day as she walked through the area where it was thought he was .", + "length": 73 + }, + { + "text": "Buckley, who led the project to exhume the deposed king, said his final .", + "length": 73 + }, + { + "text": "monastery in Leicester, they were disturbed on a number of occasions as .", + "length": 73 + }, + { + "text": "may have had an impact on his character - and specifically on the way he .", + "length": 74 + }, + { + "text": "film, which is now 'getting serious interest from Los Angeles and in the .", + "length": 74 + }, + { + "text": "site was later turned into a council car park for social workers until a .", + "length": 74 + }, + { + "text": "evidence for Shakespeare’s depiction of Richard III as a psychopath, he .", + "length": 75 + }, + { + "text": "disability, evident in the curvature of the spine of the King’s remains .", + "length": 75 + }, + { + "text": "In medieval times, deformation was often taken as a visible indication of a twisted soul.", + "length": 89 + }, + { + "text": "He said: 'The remains were very vulnerable because they were only under relatively modern debris.", + "length": 97 + }, + { + "text": "As a result, it is possible that this would have made him cautious in all his interactions with others.", + "length": 103 + }, + { + "text": "' One woman's hunch led to the discovery of the skeleton which has now been proven to be that of Richard III.", + "length": 109 + }, + { + "text": "A less experienced team could easily have damaged the skeleton whilst using a mechanical digger to open the trench.", + "length": 115 + }, + { + "text": "'I am a rational human being but the feeling I got was the same feeling I have had before when a truth is given to me.", + "length": 118 + }, + { + "text": "However, the academics speculate that Richard may have exhibited a common psychological syndrome know as an intolerance to uncertainty.", + "length": 135 + }, + { + "text": "Professor Mark Lansdale said: 'This syndrome is associated with a need to seek security following an insecure childhood, as Richard had.", + "length": 136 + }, + { + "text": "Researchers have claimed that Shakespeare's portrayal of King Richard III as a psychopath was unfounded - but admit he was a control freak.", + "length": 139 + }, + { + "text": "'This reputation, portrayed most famously in Shakespeare’s play, does not seem to have any basis in the facts we have about his life,' they said.", + "length": 147 + }, + { + "text": "Firstly, they examined one of the most persistent and critical depictions of Richard’s personality – the suggestion that he was a murdering psychopath.", + "length": 155 + }, + { + "text": "A team at the University of Leicester, which recently discovered the remains of the King under a car park, are now attempting to analyse the king's personality.", + "length": 160 + }, + { + "text": "' Professor Mark Lansdale, Head of the University’s School of Psychology, and forensic psychologist Dr Julian Boon carried out the study based on the consensus among historians.", + "length": 179 + }, + { + "text": "They found he showed little signs of the traits psychologists would use to identify psychopaths today – including narcissism, deviousness, callousness, recklessness and lack of empathy in close relationships.", + "length": 210 + }, + { + "text": "In varying degrees, it is associated with a number of positive aspects of personality including a strong sense of right and wrong, piety, loyalty to trusted colleagues, and a belief in legal processes - all exhibited by Richard.", + "length": 228 + }, + { + "text": "'On the negative side it is also associated with fatalism, a tendency to disproportionate responses when loyalty is betrayed and a general sense of 'control freakery' that can, in extreme cases, emerge as very authoritarian or possibly priggish.", + "length": 245 + }, + { + "text": "'However, noting that this is the problem historians work with as a matter of routine, we argue that a psychological approach provides a distinct and novel perspective: one which offers a different way of thinking about the human being behind the bones.", + "length": 253 + }, + { + "text": "Professor Lansdale added: 'Overall, we recognise the difficulty of drawing conclusions about people who lived 500 years ago and about whom relatively little is reliably recorded; especially when psychology is a science that is so reliant upon observation.", + "length": 255 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.521108478307724 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:15.881116575Z", + "first_section_created": "2025-12-23T09:34:15.881388686Z", + "last_section_published": "2025-12-23T09:34:15.881796802Z", + "all_results_received": "2025-12-23T09:34:15.9881235Z", + "output_generated": "2025-12-23T09:34:15.988321808Z", + "total_processing_time_ms": 107, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 106, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:15.881388686Z", + "publish_time": "2025-12-23T09:34:15.881610895Z", + "first_worker_start": "2025-12-23T09:34:15.882274221Z", + "last_worker_end": "2025-12-23T09:34:15.977618Z", + "total_journey_time_ms": 96, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:15.882311323Z", + "start_time": "2025-12-23T09:34:15.882402527Z", + "end_time": "2025-12-23T09:34:15.88249083Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:15.882628Z", + "start_time": "2025-12-23T09:34:15.88281Z", + "end_time": "2025-12-23T09:34:15.977618Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 94 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:15.882311323Z", + "start_time": "2025-12-23T09:34:15.882371225Z", + "end_time": "2025-12-23T09:34:15.882463629Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:15.882214019Z", + "start_time": "2025-12-23T09:34:15.882274221Z", + "end_time": "2025-12-23T09:34:15.882313823Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:15.881675597Z", + "publish_time": "2025-12-23T09:34:15.881796802Z", + "first_worker_start": "2025-12-23T09:34:15.882399826Z", + "last_worker_end": "2025-12-23T09:34:15.987201Z", + "total_journey_time_ms": 105, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:15.882460629Z", + "start_time": "2025-12-23T09:34:15.882527132Z", + "end_time": "2025-12-23T09:34:15.882579634Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:15.88274Z", + "start_time": "2025-12-23T09:34:15.882889Z", + "end_time": "2025-12-23T09:34:15.987201Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 104 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:15.882444628Z", + "start_time": "2025-12-23T09:34:15.88248863Z", + "end_time": "2025-12-23T09:34:15.882536432Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:15.882361925Z", + "start_time": "2025-12-23T09:34:15.882399826Z", + "end_time": "2025-12-23T09:34:15.882420927Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 198, + "min_processing_ms": 94, + "max_processing_ms": 104, + "avg_processing_ms": 99, + "median_processing_ms": 104, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3306, + "slowest_section_id": 1, + "slowest_section_time_ms": 105 + } +} diff --git a/data/output/003e8700a9a79c6161e63d4a3eeca93bea6061d0.json b/data/output/003e8700a9a79c6161e63d4a3eeca93bea6061d0.json new file mode 100644 index 0000000..d6e9fda --- /dev/null +++ b/data/output/003e8700a9a79c6161e63d4a3eeca93bea6061d0.json @@ -0,0 +1,250 @@ +{ + "file_name": "003e8700a9a79c6161e63d4a3eeca93bea6061d0.txt", + "total_words": 768, + "top_n_words": [ + { + "word": "the", + "count": 45 + }, + { + "word": "of", + "count": 30 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "undercover", + "count": 23 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "officers", + "count": 16 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "it", + "count": 13 + }, + { + "word": "was", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "'This is clearly inefficient and, at worst, could lead to avoidable mistakes being made.", + "length": 88 + }, + { + "text": "Mr Martin, a former British Army serviceman, became national lead for undercover policing in January 2013.", + "length": 106 + }, + { + "text": "Commander Martin remains a member of the Working Group but was replaced as chairman by Bedfordshire Police Assistant Chief Constable Jon Boutcher.", + "length": 146 + }, + { + "text": "He joined West Midlands Police in 1994 and moved to the Metropolitan Police in 2004 following a stint in the National Criminal Intelligence Service.", + "length": 148 + }, + { + "text": "This 'closed' attitude was behind the 'failure' of the police to adapt undercover policing response to online threats such as child sexual exploitation.", + "length": 152 + }, + { + "text": "The report was commissioned by Home Secretary Theresa May after it was revealed undercover officers had spied on the family of murdered teenager Stephen Lawrence.", + "length": 162 + }, + { + "text": "Elsewhere, a 'culture of secrecy amongst the undercover community' and a view that the 'undercover community has been nailed shut for years' were hindering progress.", + "length": 165 + }, + { + "text": "There are more than 1,200 police officers currently working undercover but only half of these are registered to the database intended to index them, a damning new report has revealed.", + "length": 184 + }, + { + "text": "'This is unacceptable, especially in the light of today's widely-held understanding of just how important sound oversight of this essential yet intrusive police tactic is,' the report added.", + "length": 190 + }, + { + "text": "Inspectors said in one instance they were initially denied access to covert premises housing an undercover unit and were told that 'chief officers were not allowed access to the same premises'.", + "length": 193 + }, + { + "text": "A series of scandals in which officers were found to have been sleeping with women they were spying on and adopting dead children's identities have damaged the public's faith in undercover policing .", + "length": 199 + }, + { + "text": "It found that 25 forces had a dedicated undercover online capability, 13 further forces worked with neighbouring forces on the issue and five forces did not have any undercover online capability at all.", + "length": 202 + }, + { + "text": "HMIC's report reveals for the first time that there are 1,229 undercover officers working in England and Wales and a total of 3,466 undercover operations were authorised between October 2009 and September 2013.", + "length": 211 + }, + { + "text": "Inspector Stephen Otter, who led the inspection, said: 'It was disappointing to find inconsistencies and shortcomings in the way undercover officers were supported by policies, systems and training across the country.", + "length": 217 + }, + { + "text": "Inspectors said it was 'disappointing' to find 'material weaknesses' in the leadership of undercover policing and urged chief constables to work together immediately to adopt a single set of standard operating procedures.", + "length": 221 + }, + { + "text": "HMIC said the professional standards body, the College of Policing, had to cancel three of the four courses which it had organised for authorising officers as the numbers of chief officers signing up to attend was so low.", + "length": 221 + }, + { + "text": "HMIC found the working group lacked 'effective and co-ordinated direction', was 'unclear about its role' and its chair and members were also unclear about how it was being held to account for the work which it was undertaking.", + "length": 226 + }, + { + "text": "' Inspectors raised concerns about the 'poor knowledge and lack of expertise' of senior leaders who authorise the use of undercover officers, although they found officers themselves to be 'knowledgeable, professional and courageous'.", + "length": 233 + }, + { + "text": "The Majesty's Inspectorate of Constabulary (HMIC) report shows that of the 1,229 officers working undercover, only 568 are registered to the national index - which the report said renders it completely 'unsuitable to the task for which it was created'.", + "length": 252 + }, + { + "text": "'Throughout our inspection, undercover officers were consistent in voicing their concerns about the ways in which forces required them to work differently from other forces and from what they understood from their training to be a nationally agreed way of working.", + "length": 264 + }, + { + "text": "It also concluded a shake-up of the National Undercover Working Group, which aims to raise standards in undercover policing, is now desperately needed in the wake of recent scandals where officers were found to be forming relationships with women they spied on and adopting dead children's identities.", + "length": 301 + }, + { + "text": "'It did not come as any surprise to us, therefore, to find that the quality of written authorities by assistant chief constables varied greatly, with too many not providing sufficient details to explain the necessity and proportionality of the decision to authorise the deployment of undercover officers,' the report said.", + "length": 322 + }, + { + "text": "The report was commissioned after it was revealed undercover officers had spied on the family of murdered teenager Stephen Lawrence (pictured) Elsewhere, the Inspectorate criticises the lack of psychological support in some forces and has called for a combined 10-year cap on length of tenure for foundation and advanced undercover officers after it found an example of one officer who had worked on undercover operations for more than 20 years.", + "length": 445 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4906477630138397 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:16.382745253Z", + "first_section_created": "2025-12-23T09:34:16.383112468Z", + "last_section_published": "2025-12-23T09:34:16.383333777Z", + "all_results_received": "2025-12-23T09:34:16.444798161Z", + "output_generated": "2025-12-23T09:34:16.444977169Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:16.383112468Z", + "publish_time": "2025-12-23T09:34:16.383333777Z", + "first_worker_start": "2025-12-23T09:34:16.3839117Z", + "last_worker_end": "2025-12-23T09:34:16.443865Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:16.383870598Z", + "start_time": "2025-12-23T09:34:16.383994203Z", + "end_time": "2025-12-23T09:34:16.384061806Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:16.384053Z", + "start_time": "2025-12-23T09:34:16.384189Z", + "end_time": "2025-12-23T09:34:16.443865Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:16.383834997Z", + "start_time": "2025-12-23T09:34:16.3839117Z", + "end_time": "2025-12-23T09:34:16.384014604Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:16.383844497Z", + "start_time": "2025-12-23T09:34:16.3839186Z", + "end_time": "2025-12-23T09:34:16.383959502Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4869, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/003ea9f105f5ddb31bc2a46b898dbdfa4d91d75f.json b/data/output/003ea9f105f5ddb31bc2a46b898dbdfa4d91d75f.json new file mode 100644 index 0000000..a9eae23 --- /dev/null +++ b/data/output/003ea9f105f5ddb31bc2a46b898dbdfa4d91d75f.json @@ -0,0 +1,254 @@ +{ + "file_name": "003ea9f105f5ddb31bc2a46b898dbdfa4d91d75f.txt", + "total_words": 382, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "it", + "count": 12 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "officers", + "count": 8 + }, + { + "word": "police", + "count": 8 + }, + { + "word": "an", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Amanda Williams .", + "length": 17 + }, + { + "text": "Scroll down to watch .", + "length": 22 + }, + { + "text": "11:54 EST, 14 March 2013 .", + "length": 26 + }, + { + "text": "14:21 EST, 14 March 2013 .", + "length": 26 + }, + { + "text": "One of the men said he was forced to use scissors to cut off his long beard.", + "length": 76 + }, + { + "text": "It is believed there were 15 officers in masks and they arrived in four cars.", + "length": 77 + }, + { + "text": "It is believed there were 15 officers in masks and they arrived in four cars.", + "length": 77 + }, + { + "text": "It is believed there has been an internal investigation launched by the Russian police.", + "length": 87 + }, + { + "text": "It is believed there has been an internal investigation launched by the Russian police .", + "length": 88 + }, + { + "text": "One of the men claims an officer held a lighter to his chin and told him 'cut it off or I will burn it off'.", + "length": 108 + }, + { + "text": "Police officers in Russia forced three men in a Muslim cafe to cut their beards at gunpoint, it has been claimed.", + "length": 113 + }, + { + "text": "Police officers in Russia forced three men in a Muslim cafe to cut their beards at gunpoint, it has been claimed .", + "length": 114 + }, + { + "text": "The incident took place on March 3 in the cafe located near the mosque as the men were eating after evening prayers .", + "length": 117 + }, + { + "text": "Afterwards it claims several people were taken to the police station, where the law enforcement officers checked their documents.", + "length": 129 + }, + { + "text": "He commanded all of his courtiers and officials to cut off their long beards and imposed an annual beard tax on those who refused.", + "length": 130 + }, + { + "text": "Russian news agencies have said the officers were implementing the rules of 'Peter the Great' who ruled the Tsardom of Russia in the late 1600s.", + "length": 144 + }, + { + "text": "According to Red Hot Russia, witnesses saw people in the uniform of riot police (OMON) burst into cafe place and force visitors onto their knees.", + "length": 145 + }, + { + "text": "During an inspection of a Muslim cafe in the Russian city of Surgut police officers apparently threatened some visitors with automatic weapons and with a lighter.", + "length": 162 + }, + { + "text": "During an inspection of a Muslim cafe in the Russian city of Surgut police officers threatened some visitors with automatic weapons and with a lighter, it has been said .", + "length": 170 + }, + { + "text": "One of the men (pictured) claims an officer held a lighter to his chin and told him 'cut it off or I will burn it off' The incident allegedly took place on March 3 in the cafe located near a mosque as the men were eating after evening prayers.", + "length": 243 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5828517079353333 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:16.88409642Z", + "first_section_created": "2025-12-23T09:34:16.884696544Z", + "last_section_published": "2025-12-23T09:34:16.884916653Z", + "all_results_received": "2025-12-23T09:34:16.952036666Z", + "output_generated": "2025-12-23T09:34:16.952185972Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:16.884696544Z", + "publish_time": "2025-12-23T09:34:16.884916653Z", + "first_worker_start": "2025-12-23T09:34:16.885447774Z", + "last_worker_end": "2025-12-23T09:34:16.950776Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:16.885484676Z", + "start_time": "2025-12-23T09:34:16.885532778Z", + "end_time": "2025-12-23T09:34:16.88557408Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:16.885627Z", + "start_time": "2025-12-23T09:34:16.885773Z", + "end_time": "2025-12-23T09:34:16.950776Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:16.885440874Z", + "start_time": "2025-12-23T09:34:16.885508677Z", + "end_time": "2025-12-23T09:34:16.885567779Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:16.885381572Z", + "start_time": "2025-12-23T09:34:16.885447774Z", + "end_time": "2025-12-23T09:34:16.885467475Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2123, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/003eaac389a2450171f99f216b95f8741b58cbf0.json b/data/output/003eaac389a2450171f99f216b95f8741b58cbf0.json new file mode 100644 index 0000000..bce88eb --- /dev/null +++ b/data/output/003eaac389a2450171f99f216b95f8741b58cbf0.json @@ -0,0 +1,250 @@ +{ + "file_name": "003eaac389a2450171f99f216b95f8741b58cbf0.txt", + "total_words": 441, + "top_n_words": [ + { + "word": "the", + "count": 18 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "benschop", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "that", + "count": 8 + }, + { + "word": "he", + "count": 7 + }, + { + "word": "is", + "count": 7 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "was", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "\"This was an accident, but Mr.", + "length": 30 + }, + { + "text": "My client is a victim as well.", + "length": 30 + }, + { + "text": "and he is punished accordingly.", + "length": 31 + }, + { + "text": "\"The victims here aren't just those who died and their families.", + "length": 64 + }, + { + "text": "Benschop, who maintains his innocence, turned himself in Saturday.", + "length": 66 + }, + { + "text": "A Philadelphia judge refused to allow Sean Benschop, 42, to leave jail.", + "length": 71 + }, + { + "text": "\"And we believe that, in time, the facts will show that he is not responsible.", + "length": 78 + }, + { + "text": "Benschop was not responsible,\" Grey said, in remarks captured by CNN affiliate WPVI.", + "length": 84 + }, + { + "text": "Wednesday's building collapse in downtown Philadelphia left six dead and 13 people injured.", + "length": 91 + }, + { + "text": "He's currently being looked at as the cause of everybody's pain, but that just isn't the case.", + "length": 94 + }, + { + "text": "\"My client is being made the scapegoat in this situation,\" said Daine Grey, Benschop's attorney.", + "length": 96 + }, + { + "text": "(CNN) -- The crane operator facing charges over a deadly building collapse was denied bail Sunday.", + "length": 98 + }, + { + "text": "\" Benschop had marijuana and pain medication in his blood after the collapse, a law enforcement source told CNN.", + "length": 112 + }, + { + "text": "\" Grey told reporters Saturday that while his client feels \"extremely sympathetic and remorseful,\" he is not guilty.", + "length": 116 + }, + { + "text": "The first lawsuit against him was filed that same day, by attorneys for a 54-year-old woman pulled from the rubble by a firefighter.", + "length": 132 + }, + { + "text": "Pennsylvania court records indicate Benschop, who also went by the alias Kary Roberts, has been arrested multiple times in the past two decades.", + "length": 144 + }, + { + "text": "Afterward, searchers climbed over shards of wood, concrete and rebar looking for survivors, such as a 61-year-old woman pulled alive from the rubble early Thursday.", + "length": 164 + }, + { + "text": "\" \"Justice will only be served if Sean Benschop receives a sentence that buries him in a jailhouse forever, just like his victims were buried on Wednesday,\" Nutter said.", + "length": 169 + }, + { + "text": "Philadelphia Mayor Michael Nutter blamed Benschop's \"reckless and irresponsible behavior\" for the building collapse and said Saturday he hopes that Benschop faces \"the harshest level of charges ...", + "length": 197 + }, + { + "text": "Nutter is pressing for answers from two property owners who hired Benschop to operate heavy machinery, saying that, along with Benschop, they \"bear the ultimate and sole responsibility for this tragedy.", + "length": 202 + }, + { + "text": "Many of the related charges -- related to alleged firearms violations and theft -- were withdrawn, dismissed or resulted in not guilty verdicts, though he was found guilty in the mid-1990s on drug charges.", + "length": 205 + }, + { + "text": "He is charged with six counts of involuntary manslaughter, 13 counts of recklessly endangering another person, and one count of \"risking a catastrophe,\" District Attorney spokeswoman Tasha Jamerson told CNN.", + "length": 207 + }, + { + "text": "\" Benschop allegedly was working a crane to tear down a vacant building in downtown Philadelphia when a four-story wall collapsed onto a Salvation Army thrift store, causing an ominous rumble followed by panic on the streets.", + "length": 225 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.907372236251831 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:17.385924606Z", + "first_section_created": "2025-12-23T09:34:17.38627772Z", + "last_section_published": "2025-12-23T09:34:17.386457628Z", + "all_results_received": "2025-12-23T09:34:17.454955997Z", + "output_generated": "2025-12-23T09:34:17.455119903Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:17.38627772Z", + "publish_time": "2025-12-23T09:34:17.386457628Z", + "first_worker_start": "2025-12-23T09:34:17.388071493Z", + "last_worker_end": "2025-12-23T09:34:17.454081Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:17.388126795Z", + "start_time": "2025-12-23T09:34:17.388190498Z", + "end_time": "2025-12-23T09:34:17.3882382Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:17.388282Z", + "start_time": "2025-12-23T09:34:17.388423Z", + "end_time": "2025-12-23T09:34:17.454081Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:17.38800869Z", + "start_time": "2025-12-23T09:34:17.388071493Z", + "end_time": "2025-12-23T09:34:17.388139696Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:17.388038492Z", + "start_time": "2025-12-23T09:34:17.388087894Z", + "end_time": "2025-12-23T09:34:17.388113995Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + } + }, + "total_sections": 1, + "average_section_size": 2724, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/003efe1b59a1ab4d031347a1e7518e565cd587e2.json b/data/output/003efe1b59a1ab4d031347a1e7518e565cd587e2.json new file mode 100644 index 0000000..ecc2fda --- /dev/null +++ b/data/output/003efe1b59a1ab4d031347a1e7518e565cd587e2.json @@ -0,0 +1,366 @@ +{ + "file_name": "003efe1b59a1ab4d031347a1e7518e565cd587e2.txt", + "total_words": 771, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "shake", + "count": 13 + }, + { + "word": "t", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "harlem", + "count": 11 + }, + { + "word": "he", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'Ugh.", + "length": 5 + }, + { + "text": "Shake .", + "length": 7 + }, + { + "text": "Lawyers.", + "length": 8 + }, + { + "text": "AttenSHUN!", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "David Mccormack .", + "length": 17 + }, + { + "text": ",’ he told them.", + "length": 18 + }, + { + "text": "Legal letters and s**t.", + "length": 23 + }, + { + "text": "Scroll down for videos .", + "length": 24 + }, + { + "text": "10:48 EST, 19 August 2013 .", + "length": 27 + }, + { + "text": "11:16 EST, 19 August 2013 .", + "length": 27 + }, + { + "text": "wild and chaotic dance party.", + "length": 29 + }, + { + "text": "Is this the new Gagnam Style?", + "length": 29 + }, + { + "text": "VIDEO: T-Pain does the Harlem Shake .", + "length": 37 + }, + { + "text": "Here, models Cara Delevingne, Jourdan .", + "length": 39 + }, + { + "text": "So I found myself in that f***ing pickle.", + "length": 41 + }, + { + "text": "VIDEO: Speedo models do the Harlem Shake .", + "length": 42 + }, + { + "text": "In a new video craze, one person - in this .", + "length": 44 + }, + { + "text": "VIDEO: The Norwegian army does the Harlem Shake .", + "length": 49 + }, + { + "text": "VIDEO: Clark Retirement Community does the Harlem Shake .", + "length": 57 + }, + { + "text": "So exposure-wise it was fantastic, but everything else...", + "length": 57 + }, + { + "text": "Dunn and Rosie Tapner whip their hair to the catchy tune .", + "length": 58 + }, + { + "text": ": A group of Norwegian soldiers line up in a neat square formation .", + "length": 68 + }, + { + "text": "case Supermodel Jourdan Dunn - is seen dancing unnoticed to Harlem .", + "length": 68 + }, + { + "text": "I think it’s mostly because of all the legal s**t,' he told Pitchfork.", + "length": 72 + }, + { + "text": "Shake with style: When the bass line drops, everyone in the frame forms a .", + "length": 75 + }, + { + "text": "'I didn’t clear the samples because I was in my f***ing bedroom on Grand Street.", + "length": 82 + }, + { + "text": "At its peek, 4,000 'Harlem Shake' videos were being uploaded onto YouTube every day.", + "length": 84 + }, + { + "text": "Party time: Members of the community look bored, before breaking into the Harlem Shake .", + "length": 88 + }, + { + "text": "Gentle gyration: A Speedo model begins to boogie unnoticed by the rest of the cast and crew...", + "length": 94 + }, + { + "text": "Never too old: Residents of an American care home bravely attempt their own version of the dance .", + "length": 98 + }, + { + "text": "It also reached number three in the United Kingdom and number one in both Australia and New Zealand.", + "length": 100 + }, + { + "text": "Hula shake: In a video posted by T-Pain, the rapper starts by hula-hooping to the beat of the song .", + "length": 100 + }, + { + "text": "‘I’m meeting with my lawyer tomorrow for lunch, so I’m gonna find that out (why he hasn't been paid).", + "length": 107 + }, + { + "text": "Seconds later, the soldiers - who are suddenly wearing skis and standing on their heads - do the crazy dance .", + "length": 110 + }, + { + "text": "VIDEO: Cara Delevingne, Jourdan Sparks and Rosie Tapner do the Harlem Shake backstage at London Fashion Week .", + "length": 110 + }, + { + "text": "Dance party: Seconds later, the whole cast and crew join in, with the exception of the underwater camera man .", + "length": 110 + }, + { + "text": "But now Baauer – real name Harry Rodrigues – has spoken out about the downside of having such an unexpected smash.", + "length": 118 + }, + { + "text": "I wasn't going to think to call up [Delgado], I didn’t even know who it was who did that [sample]; I knew the Jayson Musson [sample].", + "length": 135 + }, + { + "text": "The viral success helped push download sales of the song, which charted at number one for five consecutive weeks on the US Billboard Hot 100.", + "length": 141 + }, + { + "text": "Now despite record label boss Diplo having previously said that an agreement had been reached to clear the samples, Baauer has said he is still waiting to be paid.", + "length": 163 + }, + { + "text": "Rhythm: When the beat comes in, a dancing crowd appears in the room, and the rapper - suddenly wearing a marching band outfit - busts a move in the centre of the group .", + "length": 169 + }, + { + "text": "Brooklyn-based DJ Baauer’s track ‘Harlem Shake’ achieved global recognition in February when it became the soundtrack to a series of memes posted on YouTube of people dancing wildly to the song.", + "length": 200 + }, + { + "text": "The 24-year-old DJ and producer created the track in his bedroom and since he wasn’t planning for such a runaway success he didn’t seek clearance from the two artists who he sampled without permission.", + "length": 205 + }, + { + "text": "Show me the money: Brooklyn-based DJ Baauer has revealed that he hasn't earned a single cent for his track 'Harlem Shake', which achieved global recognition in February when it became the soundtrack to a series of memes .", + "length": 221 + }, + { + "text": "The DJ behind this year’s biggest dance craze claims he hasn’t earned a single cent from the track because of legal issues arising from not having properly cleared samples with the original artists before it was released.", + "length": 225 + }, + { + "text": "While Baauer may not have received any money yet from downloads of his track, it certainly has helped to further his reputation and will undoubtedly have helped raise the fee he charges for appearing at clubs and music festivals.", + "length": 229 + }, + { + "text": "The ‘Harlem Shake’ quickly became an internet phenomenon inspiring a raft of other clips of people doing the crazy dance in any number of bizarre situations – in an old folks home, while carrying out Norwegian army drills, while filming a Speedo commercial.", + "length": 263 + }, + { + "text": "Even though the track had been around since May 2012, it was only after it had gained global success, that Baauer and his record label - Mad Decent - received lawyer's letters on behalf of the sampled artists - reggaetón artist Hector Delgado and Philadelphia rapper Jayson Musson.", + "length": 282 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5069165825843811 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:17.887251272Z", + "first_section_created": "2025-12-23T09:34:17.887651088Z", + "last_section_published": "2025-12-23T09:34:17.887913699Z", + "all_results_received": "2025-12-23T09:34:17.956394267Z", + "output_generated": "2025-12-23T09:34:17.956585075Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:17.887651088Z", + "publish_time": "2025-12-23T09:34:17.887913699Z", + "first_worker_start": "2025-12-23T09:34:17.888551225Z", + "last_worker_end": "2025-12-23T09:34:17.955287Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:17.888472921Z", + "start_time": "2025-12-23T09:34:17.888551225Z", + "end_time": "2025-12-23T09:34:17.888661829Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:17.888715Z", + "start_time": "2025-12-23T09:34:17.88887Z", + "end_time": "2025-12-23T09:34:17.955287Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:17.888502123Z", + "start_time": "2025-12-23T09:34:17.888585626Z", + "end_time": "2025-12-23T09:34:17.88868103Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:17.888511923Z", + "start_time": "2025-12-23T09:34:17.888583826Z", + "end_time": "2025-12-23T09:34:17.888626828Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4443, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/003f45e32409fe8a7f8c7be474c827504d46a061.json b/data/output/003f45e32409fe8a7f8c7be474c827504d46a061.json new file mode 100644 index 0000000..64528e0 --- /dev/null +++ b/data/output/003f45e32409fe8a7f8c7be474c827504d46a061.json @@ -0,0 +1,338 @@ +{ + "file_name": "003f45e32409fe8a7f8c7be474c827504d46a061.txt", + "total_words": 562, + "top_n_words": [ + { + "word": "the", + "count": 35 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "his", + "count": 16 + }, + { + "word": "it", + "count": 14 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "mundell", + "count": 12 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "animal", + "count": 9 + }, + { + "word": "for", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Fierce?", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Visit NBCNews.", + "length": 14 + }, + { + "text": "It scratched my back.", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "I was bleeding like crazy.", + "length": 26 + }, + { + "text": "15:25 EST, 7 January 2013 .", + "length": 27 + }, + { + "text": "09:46 EST, 7 January 2013 .", + "length": 27 + }, + { + "text": "'It would have killed my wife.", + "length": 30 + }, + { + "text": "I know it would have,' he said.", + "length": 31 + }, + { + "text": "'You don’t test it, you do it.", + "length": 32 + }, + { + "text": "They were then treated for rabies .", + "length": 35 + }, + { + "text": "Test results are not yet available.", + "length": 35 + }, + { + "text": "'Bleeding like crazy': Roger Mundell Jr.", + "length": 40 + }, + { + "text": "Daily Mail Reporter and Associated Press .", + "length": 42 + }, + { + "text": "'I used everything I had to do it,' he said.", + "length": 44 + }, + { + "text": "Mundell's wife had run into the house for a gun.", + "length": 48 + }, + { + "text": "'I have bite marks in my eyelid, up my forehead.", + "length": 48 + }, + { + "text": "com for breaking news, world news, and news about the economy .", + "length": 63 + }, + { + "text": "As the couple held the cat down, Mundell shot the animal twice.", + "length": 63 + }, + { + "text": "Mundell, his nephew and his wife, are being treated for rabies.", + "length": 63 + }, + { + "text": "His wife wasn't bitten, but got the animal's blood on her hands.", + "length": 64 + }, + { + "text": "It is completely consistent with an animal that may have rabies.", + "length": 64 + }, + { + "text": "Mundell and his wife pinned the cat to the ground and shot it dead.", + "length": 67 + }, + { + "text": "' Bobcat numbers are growing nationwide and especially in Massachusetts.", + "length": 72 + }, + { + "text": "'It was on me in a split second,' Mundell told the Boston Globe on Sunday.", + "length": 74 + }, + { + "text": "But exact figures are difficult to calculate due to the animal's elusive nature.", + "length": 80 + }, + { + "text": "It then ran out of the garage and bit Mundell's 15-year-old nephew on the arms and back.", + "length": 88 + }, + { + "text": "Lucky escape: The man ran into the driveway where the animal bit his 15-year-old nephew .", + "length": 89 + }, + { + "text": "was scratched on his face and back after being attacked by a rabid bobcat in his own garage .", + "length": 93 + }, + { + "text": "Self-defense: Robert Mundell and his wife pinned the bobcat down and shot it twice in the head.", + "length": 95 + }, + { + "text": "Shock and awe: The victim claims that he only heard a hiss before being attacked in his garage .", + "length": 96 + }, + { + "text": "But as Mundell emerged into the drive to warn his nephew, the animal reappeared and bit the teenager.", + "length": 101 + }, + { + "text": "Bobcats grow to around twice the size of domestic house cats but are elusive animals and usually avoid human contact .", + "length": 118 + }, + { + "text": "' Mundell told the Boston Globe that he was glad that it was him rather than his wife or teenage nephew that was attacked.", + "length": 122 + }, + { + "text": "State Environmental Police took the bobcat to have it tested for rabies, which they think is likely given its unusual behavior.", + "length": 127 + }, + { + "text": "' While bobcats are a common sight in Massachusetts, the timid animals usually avoid humans and it is highly unusual that they attack people.", + "length": 141 + }, + { + "text": "Roger Mundell Jr, 53, went into the garage in Brookfield on Sunday morning to fetch some tie-down straps for a friend when the rabid animal attacked.", + "length": 149 + }, + { + "text": "After being taken by surprise by the bobcat, Mundell managed to throw his jacket - with the animal attached - over his head and make a run for the garage door.", + "length": 159 + }, + { + "text": "A man in Massachusetts says all he heard was a hiss before a bobcat pounced on him in his own garage, sinking its teeth into his face and its claws in his back.", + "length": 160 + }, + { + "text": "Tom French, of the state Division of Fisheries and Wildlife, said: 'This is completely out of character for a bobcat, even to be in the garage in the first place.", + "length": 162 + }, + { + "text": "The animal is approximately twice the size of a domestic house cat and can be easily identified by its short, 'bobbed' tail, prominent face ruff, and slightly tufted ears.", + "length": 171 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5369274616241455 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:18.388729144Z", + "first_section_created": "2025-12-23T09:34:18.389966694Z", + "last_section_published": "2025-12-23T09:34:18.390186303Z", + "all_results_received": "2025-12-23T09:34:18.462693534Z", + "output_generated": "2025-12-23T09:34:18.462862141Z", + "total_processing_time_ms": 74, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 72, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:18.389966694Z", + "publish_time": "2025-12-23T09:34:18.390186303Z", + "first_worker_start": "2025-12-23T09:34:18.390800928Z", + "last_worker_end": "2025-12-23T09:34:18.461865Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:18.390733125Z", + "start_time": "2025-12-23T09:34:18.390817829Z", + "end_time": "2025-12-23T09:34:18.390899232Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:18.390944Z", + "start_time": "2025-12-23T09:34:18.391087Z", + "end_time": "2025-12-23T09:34:18.461865Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:18.390733725Z", + "start_time": "2025-12-23T09:34:18.390825829Z", + "end_time": "2025-12-23T09:34:18.390886531Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:18.390709324Z", + "start_time": "2025-12-23T09:34:18.390800928Z", + "end_time": "2025-12-23T09:34:18.390832429Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3080, + "slowest_section_id": 0, + "slowest_section_time_ms": 71 + } +} diff --git a/data/output/003f8c8953025e086fa773c9b40d9b8cd6d9754c.json b/data/output/003f8c8953025e086fa773c9b40d9b8cd6d9754c.json new file mode 100644 index 0000000..8ad60bb --- /dev/null +++ b/data/output/003f8c8953025e086fa773c9b40d9b8cd6d9754c.json @@ -0,0 +1,408 @@ +{ + "file_name": "003f8c8953025e086fa773c9b40d9b8cd6d9754c.txt", + "total_words": 933, + "top_n_words": [ + { + "word": "the", + "count": 56 + }, + { + "word": "was", + "count": 27 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "school", + "count": 21 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "that", + "count": 19 + }, + { + "word": "redding", + "count": 15 + }, + { + "word": "in", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "1 v.", + "length": 4 + }, + { + "text": "Redding (08-479).", + "length": 17 + }, + { + "text": "You know, I had a 4.", + "length": 20 + }, + { + "text": "No drugs were found.", + "length": 20 + }, + { + "text": "S Supreme Court in April.", + "length": 25 + }, + { + "text": "\" She is attending college.", + "length": 27 + }, + { + "text": "Savana Redding leaves the U.", + "length": 28 + }, + { + "text": "The girl denied the accusations.", + "length": 32 + }, + { + "text": "She was 13 when she was strip-searched.", + "length": 39 + }, + { + "text": "No medication was found, and she later sued.", + "length": 44 + }, + { + "text": "A search of Redding's backpack found nothing.", + "length": 45 + }, + { + "text": "\"That's a problem schools are trying to stem.", + "length": 45 + }, + { + "text": "The case is Safford Unified School District No.", + "length": 47 + }, + { + "text": "The Supreme Court found little agreement on key issues.", + "length": 55 + }, + { + "text": "\"Before it happened, I loved school, loved everything about it.", + "length": 63 + }, + { + "text": "\" Redding, now 19, said she has never gotten over her experience.", + "length": 65 + }, + { + "text": "\"I held my head down so that they could not see that I was about to cry.", + "length": 72 + }, + { + "text": "Adam Wolf, an ACLU attorney who represented Redding, applauded the decision.", + "length": 76 + }, + { + "text": "\"And, common sense is not a judicial monopoly or a constitutional imperative.", + "length": 77 + }, + { + "text": "But school administrators said the ruling does not make their jobs any easier.", + "length": 78 + }, + { + "text": "0 GPA, honor roll, and now, well, afterwards I never wanted to go to school again.", + "length": 82 + }, + { + "text": "A strip search was conducted by Wilson's assistant and a school nurse, both females.", + "length": 84 + }, + { + "text": "\"I'm pretty certain that it's so far less likely to happen again\" to other students.", + "length": 84 + }, + { + "text": "\" Whether the school district would be liable was not an issue before the high court.", + "length": 85 + }, + { + "text": "Savana Redding was 13 when administrators suspected that she was carrying banned drugs.", + "length": 87 + }, + { + "text": "That court said the school went too far in its effort to create a drug- and crime-free classroom.", + "length": 97 + }, + { + "text": "\"The strip search was the most humiliating experience I have ever had,\" Redding said in an affidavit.", + "length": 101 + }, + { + "text": "But when no contraband was found, the officials went too far by continuing the search of her underwear.", + "length": 103 + }, + { + "text": "\" In 1985, the high court allowed the search of a student's purse after she was suspected of hiding cigarettes.", + "length": 111 + }, + { + "text": "The justices concluded that the search was unreasonable but that individual school administrators could not be sued.", + "length": 116 + }, + { + "text": "Redding was pulled from class by Vice Principal Kerry Wilson, escorted to an office and confronted with the evidence.", + "length": 117 + }, + { + "text": "\" Souter said Wilson initially had \"sufficient suspicion\" to justify searching the girl's backpack and outer clothing.", + "length": 118 + }, + { + "text": "\"Preservation of order, discipline and safety in public schools is simply not the domain of the Constitution,\" he said.", + "length": 119 + }, + { + "text": "those medications for abusive purposes,\" said Francisco Negron, general counsel for the National School Boards Association.", + "length": 123 + }, + { + "text": "But Justice Clarence Thomas took the opposite view: that administrators deserved immunity and that the search was permissible.", + "length": 126 + }, + { + "text": "\"When parents send their kids to school, they can now breathe a sigh of relief they will not end up naked before school officials,\" Wolf said .", + "length": 143 + }, + { + "text": "Redding was ordered to strip to her underwear and to pull on the elastic of the underwear, so any hidden pills might fall out, according to court records.", + "length": 154 + }, + { + "text": "Redding was an eighth-grade honor student in 2003, with no history of disciplinary problems at Safford Middle School, about 127 miles from Tucson, Arizona.", + "length": 155 + }, + { + "text": "The school had a near-zero-tolerance policy for all prescription and over-the-counter medication, including the ibuprofen, without prior written permission.", + "length": 156 + }, + { + "text": "\" Opinions in 1995 and 2001 allowed schools to conduct random drug testing of high school athletes and those participating in other extracurricular activities.", + "length": 159 + }, + { + "text": "Such a search was permitted if there were \"reasonable\" grounds for believing that it would turn up evidence and when the search was not \"excessively intrusive.", + "length": 159 + }, + { + "text": "\" \"How they determine now whether the drug is dangerous, whether it's not dangerous -- that kind of clarity and that kind of guidance, the court did not give us.", + "length": 161 + }, + { + "text": "Justices John Paul Stevens and Ruth Bader Ginsburg agreed that the search was illegal but would have also made individual officials liable for damages by Redding.", + "length": 162 + }, + { + "text": "The court was being asked to clarify the extent of student rights involving searches and the discretion of officials regarding those they have responsibility over.", + "length": 163 + }, + { + "text": "During an investigation into pills found at the school, a student told the vice principal that Redding had given her prescription-strength 400-milligram ibuprofen pills.", + "length": 169 + }, + { + "text": "\"I'm pretty excited that they agreed with me, they see that it was wrong for the school to do that,\" Redding said from her Hobbs, New Mexico, home after the ruling was announced.", + "length": 178 + }, + { + "text": "But reflecting the divisiveness over the issue, Souter said, \"We think these differences of opinion from our own are substantial enough to require immunity for the school officials in this case.", + "length": 194 + }, + { + "text": "With the help of the American Civil Liberties Union, Redding and her family sued, and a federal appeals court in San Francisco ruled against the school, calling the search \"traumatizing\" and illegal.", + "length": 199 + }, + { + "text": "The larger issue of whether a campus setting traditionally gives schools greater authority over students suspected of illegal activity than police are allowed was not addressed fully by the divided court.", + "length": 204 + }, + { + "text": "\"Wilson's treatment of Redding was abusive, and it was not reasonable for him to believe that the law permitted it,\" said Ginsburg, who was especially forceful during oral arguments in April, criticizing the school's actions.", + "length": 225 + }, + { + "text": "\"Savana's subjective expectation of privacy against such a search is inherent in her account of it as embarrassing, frightening and humiliating,\" wrote Justice David Souter for the majority, likely his last opinion before he steps down from the bench next week.", + "length": 261 + }, + { + "text": "WASHINGTON (CNN) -- A former middle-school student who was strip-searched by school officials looking for ibuprofen pain medication won a partial victory of her Supreme Court appeal Thursday in a case testing the discretion of officials to ensure classroom safety.", + "length": 264 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5255370736122131 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:18.891044227Z", + "first_section_created": "2025-12-23T09:34:18.891350939Z", + "last_section_published": "2025-12-23T09:34:18.891755655Z", + "all_results_received": "2025-12-23T09:34:18.984738703Z", + "output_generated": "2025-12-23T09:34:18.984960712Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:18.891350939Z", + "publish_time": "2025-12-23T09:34:18.891649751Z", + "first_worker_start": "2025-12-23T09:34:18.892188973Z", + "last_worker_end": "2025-12-23T09:34:18.961681Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:18.892192773Z", + "start_time": "2025-12-23T09:34:18.892264476Z", + "end_time": "2025-12-23T09:34:18.89235738Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:18.892513Z", + "start_time": "2025-12-23T09:34:18.892646Z", + "end_time": "2025-12-23T09:34:18.961681Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:18.89212947Z", + "start_time": "2025-12-23T09:34:18.892250675Z", + "end_time": "2025-12-23T09:34:18.892355379Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:18.892106369Z", + "start_time": "2025-12-23T09:34:18.892188973Z", + "end_time": "2025-12-23T09:34:18.892235275Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:18.891694953Z", + "publish_time": "2025-12-23T09:34:18.891755655Z", + "first_worker_start": "2025-12-23T09:34:18.892180772Z", + "last_worker_end": "2025-12-23T09:34:18.983854Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:18.892192773Z", + "start_time": "2025-12-23T09:34:18.892253175Z", + "end_time": "2025-12-23T09:34:18.892269876Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:18.892868Z", + "start_time": "2025-12-23T09:34:18.893013Z", + "end_time": "2025-12-23T09:34:18.983854Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:18.892213874Z", + "start_time": "2025-12-23T09:34:18.892242475Z", + "end_time": "2025-12-23T09:34:18.892265176Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:18.892133471Z", + "start_time": "2025-12-23T09:34:18.892180772Z", + "end_time": "2025-12-23T09:34:18.892189773Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 159, + "min_processing_ms": 69, + "max_processing_ms": 90, + "avg_processing_ms": 79, + "median_processing_ms": 90, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2830, + "slowest_section_id": 1, + "slowest_section_time_ms": 92 + } +} diff --git a/data/output/00402d448b1de131717eea88d9c1d63a126e083a.json b/data/output/00402d448b1de131717eea88d9c1d63a126e083a.json new file mode 100644 index 0000000..9968f0d --- /dev/null +++ b/data/output/00402d448b1de131717eea88d9c1d63a126e083a.json @@ -0,0 +1,226 @@ +{ + "file_name": "00402d448b1de131717eea88d9c1d63a126e083a.txt", + "total_words": 348, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "jazzy", + "count": 7 + }, + { + "word": "ditch", + "count": 6 + }, + { + "word": "dog", + "count": 6 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "razor", + "count": 6 + }, + { + "word": "s", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'It really means a lot.", + "length": 23 + }, + { + "text": "The tiny dog then took off across a field towards Jazzy in the ditch.", + "length": 69 + }, + { + "text": "Razor began barking wildly last Monday morning, the day after his larger pal left.", + "length": 82 + }, + { + "text": "The dog lover's pitbull Layla also ran away from home last Sunday and is still missing.", + "length": 87 + }, + { + "text": "Dogs are known as man's best friend, but they make sure to look out for their fellow canines as well.", + "length": 101 + }, + { + "text": "Jazzy was then taken out of the mud by eight rescue workers who used backboard straps and a spine board.", + "length": 104 + }, + { + "text": "You don’t really realize how attached you are to your pets until something like this happens,” Mr Chavez said .", + "length": 115 + }, + { + "text": "Worried owner: Tim Chavez also lost his pitbull Layla on Sunday, the same day that Jazzy left home and ended up in a ditch .", + "length": 124 + }, + { + "text": "Belen Fire and Rescue's Manny Garcia said that the dogs hind legs were submerged and she was unable to move, according to KOAT.", + "length": 127 + }, + { + "text": "A passer-by driving on the road adjacent to the ditch heard the barking, looked down and saw Razor next to the immobilized Jazzy.", + "length": 129 + }, + { + "text": "Razor's heroics have earned her local fame, and the city council plans to honor the dog with a certificate at the beginning of March.", + "length": 133 + }, + { + "text": "Sticky situation: The dachshund's work led to fire and police workers coming to the irrigation ditch and rescuing Jazzy with a spineboard .", + "length": 139 + }, + { + "text": "Hot dog hero: The St Bernard and Razor were reunited on the shore, and the Belen, New Mexico, city council plans on honoring the small hero .", + "length": 141 + }, + { + "text": "Little Lassie: Razor the dachshund (left) alerted a passing driver to the predicament of St Bernard Jazzy, after the big dog became stuck in a ditch .", + "length": 150 + }, + { + "text": "Owner Tim Chavez believes that the nine-year-old large dog was stuck in a muddy irrigation ditch for 18 hours after she ran away from home, according to KRQE.", + "length": 158 + }, + { + "text": "A tiny dachshund named Razor helped alert a passersby in Belen, New Mexico, to the fact that his 180lb friend Jazzy, a St Bernard, was stuck two feet in the mud.", + "length": 161 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.441754549741745 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:19.392509337Z", + "first_section_created": "2025-12-23T09:34:19.393911393Z", + "last_section_published": "2025-12-23T09:34:19.394113501Z", + "all_results_received": "2025-12-23T09:34:19.465103862Z", + "output_generated": "2025-12-23T09:34:19.46528557Z", + "total_processing_time_ms": 72, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:19.393911393Z", + "publish_time": "2025-12-23T09:34:19.394113501Z", + "first_worker_start": "2025-12-23T09:34:19.394688124Z", + "last_worker_end": "2025-12-23T09:34:19.464168Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:19.394701225Z", + "start_time": "2025-12-23T09:34:19.394795729Z", + "end_time": "2025-12-23T09:34:19.394843731Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:19.394915Z", + "start_time": "2025-12-23T09:34:19.395083Z", + "end_time": "2025-12-23T09:34:19.464168Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:19.394610021Z", + "start_time": "2025-12-23T09:34:19.394688124Z", + "end_time": "2025-12-23T09:34:19.394753027Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:19.394732026Z", + "start_time": "2025-12-23T09:34:19.394793629Z", + "end_time": "2025-12-23T09:34:19.39481823Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 69, + "min_processing_ms": 69, + "max_processing_ms": 69, + "avg_processing_ms": 69, + "median_processing_ms": 69, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1882, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/0040765e65e917debb48cbc3a4ae92a14ef6c421.json b/data/output/0040765e65e917debb48cbc3a4ae92a14ef6c421.json new file mode 100644 index 0000000..2f615fc --- /dev/null +++ b/data/output/0040765e65e917debb48cbc3a4ae92a14ef6c421.json @@ -0,0 +1,658 @@ +{ + "file_name": "0040765e65e917debb48cbc3a4ae92a14ef6c421.txt", + "total_words": 1804, + "top_n_words": [ + { + "word": "to", + "count": 54 + }, + { + "word": "the", + "count": 48 + }, + { + "word": "she", + "count": 38 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "of", + "count": 37 + }, + { + "word": "on", + "count": 29 + }, + { + "word": "i", + "count": 28 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "have", + "count": 25 + }, + { + "word": "benefits", + "count": 23 + } + ], + "sorted_sentences": [ + { + "text": "five of us.", + "length": 11 + }, + { + "text": "Yes or No'.", + "length": 11 + }, + { + "text": "'Obviously .", + "length": 12 + }, + { + "text": "15 per week.", + "length": 12 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Street where she lives.", + "length": 23 + }, + { + "text": "I would prefer to be working.", + "length": 29 + }, + { + "text": "and then I have two good days.", + "length": 30 + }, + { + "text": "flexible enough to protect both.", + "length": 32 + }, + { + "text": "During the process, she may have .", + "length": 34 + }, + { + "text": "White Dee could receive up to £100.", + "length": 36 + }, + { + "text": "have signed her off as unfit to work.", + "length": 37 + }, + { + "text": "In order to get her ESA benefits for .", + "length": 38 + }, + { + "text": "As a mother-of-two, she receives £20.", + "length": 38 + }, + { + "text": "Footage courtsey of ITV This Morning .", + "length": 38 + }, + { + "text": "What sort of employer would I ring up .", + "length": 39 + }, + { + "text": "someone else looks after their children.", + "length": 40 + }, + { + "text": "' The mother, who has children Gerrard, .", + "length": 41 + }, + { + "text": "As a mother-of two she is entitled to £33.", + "length": 43 + }, + { + "text": "40 for her second child Gerrard, aged seven.", + "length": 44 + }, + { + "text": "70 child benefit per week, a maximum of £100.", + "length": 46 + }, + { + "text": "provided a GP's note confirming that she is sick.", + "length": 49 + }, + { + "text": "And in reality they could have put more of that out.", + "length": 52 + }, + { + "text": "At the moment I am not in a place where (I can work).", + "length": 53 + }, + { + "text": "At the moment I am not in a place where (I can work).", + "length": 53 + }, + { + "text": "She expects the taxpayer to fund her life on benefits.", + "length": 54 + }, + { + "text": "It looks like you are having a great crack,' she said.", + "length": 54 + }, + { + "text": "'It looks great, it looks like a right barrel of laughs.", + "length": 56 + }, + { + "text": "and say \"sorry I can't come in for three days - I'm low\".", + "length": 57 + }, + { + "text": "It has been criticised by viewers for being unrealistic .", + "length": 57 + }, + { + "text": "It contained benefit fraudsters, shoplifters and drug use.", + "length": 58 + }, + { + "text": "'I think we went into it very naively to be honest,' she said.", + "length": 62 + }, + { + "text": "30 per week for her first child 16-year-old Caitlin plus £13.", + "length": 62 + }, + { + "text": "Finally she receives ESA, which is paid to people who are sick.", + "length": 63 + }, + { + "text": "She narrowly avoided prison after being caught stealing £13,000.", + "length": 65 + }, + { + "text": "out together, where we all helped each other, when someone is ill, .", + "length": 68 + }, + { + "text": "I suffer with depression and I am being assessed for bipolar disorder.", + "length": 70 + }, + { + "text": "I suffer with depression and I am being assessed for bipolar disorder.", + "length": 70 + }, + { + "text": "we went into it believing it was community spirit - I know people are .", + "length": 71 + }, + { + "text": "seven, and Caitlin, 16, also said that she appeared on Benefits Street .", + "length": 72 + }, + { + "text": "thinking it was going to show off the community spirit in James Turner .", + "length": 72 + }, + { + "text": "'I have a good day, I have three bad days and then I have two good days.", + "length": 72 + }, + { + "text": "depression, White Dee will have been assessed by Atos workers who would .", + "length": 73 + }, + { + "text": "probably sick of hearing us say that now - but we were told that how we .", + "length": 73 + }, + { + "text": "live were how children used to live years ago, where they could all play .", + "length": 74 + }, + { + "text": "Controversy: Critics have questioned whether White Dee is really unable to work.", + "length": 80 + }, + { + "text": "15 ESA payments because she is 'depressed', plus £115 per week child tax credits.", + "length": 82 + }, + { + "text": "However, mental health charities have said that depression is not always obvious .", + "length": 82 + }, + { + "text": "Television appearance: White Dee has made TV appearances but says she was not paid .", + "length": 84 + }, + { + "text": "'Our own experience at SANE is that most people with depression really want to work.", + "length": 84 + }, + { + "text": "However, this can be reduced if the individual does not attend support group sessions.", + "length": 86 + }, + { + "text": "‘The only person who should be depressed is the taxpayer who has to pick up the bill.", + "length": 87 + }, + { + "text": "She said in a recent interview on Channel 5: 'I haven't been on benefits my whole life.", + "length": 87 + }, + { + "text": "' Benefits Street: James Turner Street in Birmingham where the Channel 4 show was filmed.", + "length": 89 + }, + { + "text": "White Dee admitted that she receives 'about £200 per week' in state handouts to live off.", + "length": 90 + }, + { + "text": "' Conservative MP Philip Davies accused White Dee of not being interested in finding a job.", + "length": 91 + }, + { + "text": "What sort of employer would I ring up and say \"sorry I can't come in for three days - I'm low\".", + "length": 95 + }, + { + "text": "Questions: Following the appearance Dee was accused of being 'bone idle' by a Conservative MP .", + "length": 95 + }, + { + "text": "They (Channel 4) were let into the house when I was sleeping, so they did see both sides of it.", + "length": 95 + }, + { + "text": "One of the questions on the form asks: 'Do you know when you will be well enough to work again?", + "length": 95 + }, + { + "text": "Anger: White Dee, from Channel 4's Benefits Street, appears on a live debate on Monday evening .", + "length": 96 + }, + { + "text": "During a recent television interview on Channel 5, she said: 'I haven't been on benefits my whole life.", + "length": 103 + }, + { + "text": "The 42-year-old appeared to have had her hair coloured and put make-up on before appearing on the show.", + "length": 103 + }, + { + "text": "She is also eligible to receive £5,995 per year in child tax credits - which works out as £115 per week.", + "length": 106 + }, + { + "text": "The money she receives consists of child tax credits, child benefit and Employment Support Allowance (ESA).", + "length": 107 + }, + { + "text": "'Obviously it is just a programme that they put together into 40 minutes that they had spent 18 months filming.", + "length": 111 + }, + { + "text": "Questions: One of the questions on the 56-page form is 'do you know when you will be well enough to work again?", + "length": 111 + }, + { + "text": "'Sanctions are used as a last resort, but it’s only right that there is a penalty if people fail to play by them.", + "length": 115 + }, + { + "text": "‘White Dee is bone idle and doesn’t want to work another day in her life and has no intention of finding a job.", + "length": 115 + }, + { + "text": "White Dee has insisted that depression means she cannot work - because she would be forced to take regular days off sick.", + "length": 121 + }, + { + "text": "Show stars: White Dee puts her arm round fellow James Turner Street resident Tich during a benefits debate on Channel 4 .", + "length": 122 + }, + { + "text": "Claim form: White Dee will have to have filled out an 56-page form like this before she can claim benefits for depression .", + "length": 123 + }, + { + "text": "' The single mother - who chewed gum during the live show - added that on-screen life on handouts was made to look like fun.", + "length": 124 + }, + { + "text": "Benefits row: Tory MP Philip Davies today accused White Dee of being 'bone idle' 'I have a good day, I have three bad days .", + "length": 124 + }, + { + "text": "In order to qualify for the ESA, she would have had to fill out A 56-page form giving full details of her medical condition.", + "length": 124 + }, + { + "text": "The 42-year-old insists that she has been unable to work since her mother died because she has been suffering from depression.", + "length": 126 + }, + { + "text": "TV appearances: White Dee has been tipped to appear on Celebrity Big Brother after 'starring' on Channel 4's Benefits Street .", + "length": 126 + }, + { + "text": "Tough new rules were introduced in October 2012 as ministers made it clear what claimants had to do in return for their benefits.", + "length": 129 + }, + { + "text": "' She added: 'It started off as a bereavement depression when I lost my mum - the most important person in my life - was just gone.", + "length": 131 + }, + { + "text": "' Marjorie Wallace, chief executive of the mental health charity SANE, said that people with depression have good days and bad days.", + "length": 132 + }, + { + "text": "The Benefits Street matriarch has been living off handouts after being sacked from her job as a council administrator seven years ago.", + "length": 134 + }, + { + "text": "There have also been rumours that she is being lined up to appear on Celebrity Big Brother which will net her a minimum fee of £60,000.", + "length": 136 + }, + { + "text": "She said: 'It's a horrible stigma, depression - just because you suffer from depression, you don't suffer from it twenty four hours a day.", + "length": 138 + }, + { + "text": "Jobseeker's Allowance payments suspended by the government because they are not doing enough to look for work, new figures revealed today.", + "length": 138 + }, + { + "text": "She appeared on television today after accusing Channel 4 of 'manipulating' her and her neighbours during a live television debate on Monday.", + "length": 141 + }, + { + "text": "White Dee - whose real name is Deirdre Kelly - went on the show just two days after she appeared on Channel 4's Benefits Britain: The Live Debate.", + "length": 146 + }, + { + "text": "Speaking on This Morning today, she said that although she appeared jovial on screen, Channel 4 did not do enough to show how bad her condition is.", + "length": 147 + }, + { + "text": "But they are simply unable to take the stress of a full working week to overcome the exhaustion and debilitation which are symptoms of the illness.", + "length": 147 + }, + { + "text": "' Claimants are required to give brief details of their illness plus provide contact details for the doctor who signs off their medical statements.", + "length": 147 + }, + { + "text": "Once the form is submitted, White Dee will have been assessed by workers from Atos who would have ruled that she is unfit to work because of her depression.", + "length": 156 + }, + { + "text": "The unemployed single mother-of-two was accused of being 'bone idle' today after she was interviewed by Eamonn Holmes and Ruth Langsford on ITV's This Morning.", + "length": 159 + }, + { + "text": "A Tory MP has questioned how Benefits Street 'star' White Dee can make repeated live television appearances while she is unable to work because she is depressed.", + "length": 161 + }, + { + "text": "'White Dee is right that this makes it exceptionally difficult for employers and potential employees who may not be able to fulfil the demands of a full working week.", + "length": 166 + }, + { + "text": "'As part of the Government’s long-term economic plan, we are ending the something for nothing culture and supporting those who want to work hard and play by the rules.", + "length": 169 + }, + { + "text": "Payments have been suspended a total of 818,000 times since then if claimants failed to attend an appointment, rejected a job offer or were not doing enough to find work.", + "length": 170 + }, + { + "text": "’ Mr Davies spoke out today as separate figures revealed 800,000 people on Jobseeker's Allowance have had their benefits suspended because they had not done enough to find work.", + "length": 179 + }, + { + "text": "Benefits Street 'star': Street matriarch White Dee, pictured with her daughter Caitlin, 16, who has been tipped to star on Celebrity Big Brother after her appearances on Channel 4 .", + "length": 181 + }, + { + "text": "‘If the people from Benefits Street are going to make these public appearances hopefully they are going to be paid for it so the taxpayer can get some of their money back,' he said.", + "length": 183 + }, + { + "text": "' White Dee has revealed she receives around £200 a week in handouts which consist of Employment Support Allowance (ESA) because of her condition, child benefit and child tax credits.", + "length": 184 + }, + { + "text": "'Mental illnesses like depression can fluctuate from day to day and the current way people are being assessed for benefits takes no account of the reality of these conditions,' she said.", + "length": 186 + }, + { + "text": "‘I think every time people look at White Dee make an appearance hopefully it will serve as a reminder to people of the mess the benefits system is in and how badly Iain Duncan Smith’s reforms are needed.", + "length": 207 + }, + { + "text": "She said: 'You (Channel 4) spent up to 18 months, up to two years, to film people that were working, you filmed old age pensioners, you came to parties, you filmed open days, community spirit and boom (you showed)...", + "length": 216 + }, + { + "text": "' 'People who are in a job know that if they don’t play by the rules or fail to turn up in the morning, there might be consequences, so it’s only right that people on benefits should have similar responsibilities.", + "length": 217 + }, + { + "text": "Work and Pensions Secretary Iain Duncan Smith said: 'This Government has always been clear that in return for claiming unemployment benefits jobseekers have a responsibility to do everything they can to get back into work.", + "length": 222 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7080699602762858 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:19.8953048Z", + "first_section_created": "2025-12-23T09:34:19.895704516Z", + "last_section_published": "2025-12-23T09:34:19.896282739Z", + "all_results_received": "2025-12-23T09:34:20.032792941Z", + "output_generated": "2025-12-23T09:34:20.032978149Z", + "total_processing_time_ms": 137, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 136, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:19.895704516Z", + "publish_time": "2025-12-23T09:34:19.895920525Z", + "first_worker_start": "2025-12-23T09:34:19.896426445Z", + "last_worker_end": "2025-12-23T09:34:19.992756Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:19.896656955Z", + "start_time": "2025-12-23T09:34:19.896711257Z", + "end_time": "2025-12-23T09:34:19.896819061Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:19.896908Z", + "start_time": "2025-12-23T09:34:19.89707Z", + "end_time": "2025-12-23T09:34:19.992756Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 95 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:19.896636154Z", + "start_time": "2025-12-23T09:34:19.896712057Z", + "end_time": "2025-12-23T09:34:19.89729468Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:19.896361743Z", + "start_time": "2025-12-23T09:34:19.896426445Z", + "end_time": "2025-12-23T09:34:19.896478347Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:19.895940526Z", + "publish_time": "2025-12-23T09:34:19.896028129Z", + "first_worker_start": "2025-12-23T09:34:19.896657655Z", + "last_worker_end": "2025-12-23T09:34:20.02038Z", + "total_journey_time_ms": 124, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:19.896579851Z", + "start_time": "2025-12-23T09:34:19.896657655Z", + "end_time": "2025-12-23T09:34:19.896759159Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:19.896841Z", + "start_time": "2025-12-23T09:34:19.896996Z", + "end_time": "2025-12-23T09:34:20.02038Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 123 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:19.896643754Z", + "start_time": "2025-12-23T09:34:19.896709657Z", + "end_time": "2025-12-23T09:34:19.896812261Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:19.896631754Z", + "start_time": "2025-12-23T09:34:19.896688756Z", + "end_time": "2025-12-23T09:34:19.896754158Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 2, + "creation_time": "2025-12-23T09:34:19.896081031Z", + "publish_time": "2025-12-23T09:34:19.896282739Z", + "first_worker_start": "2025-12-23T09:34:19.896600152Z", + "last_worker_end": "2025-12-23T09:34:20.031927Z", + "total_journey_time_ms": 135, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:19.897072771Z", + "start_time": "2025-12-23T09:34:19.897116873Z", + "end_time": "2025-12-23T09:34:19.897119373Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:19.995361Z", + "start_time": "2025-12-23T09:34:19.99547Z", + "end_time": "2025-12-23T09:34:20.031927Z", + "queue_wait_time_ms": 99, + "processing_time_ms": 36 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:19.89752859Z", + "start_time": "2025-12-23T09:34:19.89754089Z", + "end_time": "2025-12-23T09:34:19.89754289Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:19.896589352Z", + "start_time": "2025-12-23T09:34:19.896600152Z", + "end_time": "2025-12-23T09:34:19.896601952Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 3, + "total_processing_ms": 254, + "min_processing_ms": 36, + "max_processing_ms": 123, + "avg_processing_ms": 84, + "median_processing_ms": 95, + "total_queue_wait_ms": 100, + "avg_queue_wait_ms": 33 + }, + "topn": { + "worker_type": "topn", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 3, + "average_section_size": 3341, + "slowest_section_id": 2, + "slowest_section_time_ms": 135 + } +} diff --git a/data/output/0040eb79bd5424f52e1ec43050539e4e12189cbf.json b/data/output/0040eb79bd5424f52e1ec43050539e4e12189cbf.json new file mode 100644 index 0000000..88c3b34 --- /dev/null +++ b/data/output/0040eb79bd5424f52e1ec43050539e4e12189cbf.json @@ -0,0 +1,508 @@ +{ + "file_name": "0040eb79bd5424f52e1ec43050539e4e12189cbf.txt", + "total_words": 1622, + "top_n_words": [ + { + "word": "the", + "count": 84 + }, + { + "word": "of", + "count": 51 + }, + { + "word": "to", + "count": 43 + }, + { + "word": "and", + "count": 42 + }, + { + "word": "warhol", + "count": 40 + }, + { + "word": "s", + "count": 35 + }, + { + "word": "in", + "count": 31 + }, + { + "word": "he", + "count": 29 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "his", + "count": 23 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "is all about.", + "length": 13 + }, + { + "text": "\"His work lives on.", + "length": 19 + }, + { + "text": "you have no privacy.", + "length": 20 + }, + { + "text": "Warhol goes to China .", + "length": 22 + }, + { + "text": "Censoring Mao in China .", + "length": 24 + }, + { + "text": "\"He was all about multiples...", + "length": 30 + }, + { + "text": "Can Warhol make a name in China?", + "length": 32 + }, + { + "text": "\" \"Andy was the ultimate pop artist.", + "length": 36 + }, + { + "text": "I think Warhol is a perfect artist...", + "length": 37 + }, + { + "text": "\" Influence on Chinese contemporary art .", + "length": 41 + }, + { + "text": "\" CNN's Feng Ke contributed to this report.", + "length": 43 + }, + { + "text": "that nothing would put the show in jeopardy.", + "length": 44 + }, + { + "text": "\" wrote user @Jianisi_yangyang on Sina Weibo.", + "length": 45 + }, + { + "text": "Notoriety and fame is a double-edged sword....", + "length": 46 + }, + { + "text": "He described Mao as \"classic Warhol subject matter.", + "length": 51 + }, + { + "text": "\"If you don't know who Andy Warhol is, I won't blame you.", + "length": 57 + }, + { + "text": "\"As Andy would say, he didn't have to wear his Andy suit.", + "length": 57 + }, + { + "text": "For example, I didn't understand the Campbell's soup cans.", + "length": 58 + }, + { + "text": "and at the time, China was the ultimate multiple,\" Makos said.", + "length": 62 + }, + { + "text": "So far, it appears that this education is welcome -- and necessary.", + "length": 67 + }, + { + "text": "His trip to Beijing was an unexpected byproduct of a visit to Hong Kong.", + "length": 72 + }, + { + "text": "president Richard Nixon's rapprochement with the communist power in 1972.", + "length": 73 + }, + { + "text": "His series of portraits went on to become some of his most well-known works.", + "length": 76 + }, + { + "text": "Ai once spotted Warhol at a party, but did not approach him, Shiner revealed.", + "length": 77 + }, + { + "text": "\"As long as that imagery is live and well, Warhol will have this built-in publicity.", + "length": 84 + }, + { + "text": "The country also provided a source of inspiration for Warhol's nascent modeling career.", + "length": 87 + }, + { + "text": "The Shanghai exhibition will run to July 28 and make its way to Beijing later this year.", + "length": 88 + }, + { + "text": "\"Knowing that we would have the censors from the Ministry of Culture, we wanted to make sure...", + "length": 95 + }, + { + "text": "Artistic inspiration aside, China also provided Warhol with a respite from the pressures of fame.", + "length": 97 + }, + { + "text": "Upon Warhol's arrival, Siu announced he had arranged a VIP tour to Beijing for him and his friends.", + "length": 99 + }, + { + "text": "\" A staff member told CNN that government authorities would have considered the works \"too political.", + "length": 101 + }, + { + "text": "\"We've seen a great deal of interest and curiosity (among Chinese) about Western art and international culture.", + "length": 111 + }, + { + "text": "\" Unfortunately, neither artist became acquainted in person with their muse, despite moving to New York for him.", + "length": 112 + }, + { + "text": "But they are much less likely to connect the work with the artist -- or to even have heard of the artist himself.", + "length": 113 + }, + { + "text": "He recalled that Warhol went virtually unrecognized in China, although the artist stood out for his unusual looks.", + "length": 114 + }, + { + "text": "However, visitors to the \"15 Minutes External\" exhibitions in mainland China will not see any Chairman Mao portraits.", + "length": 117 + }, + { + "text": "\" Ai's similarity to Warhol also lies in his social activism, which aims to change Chinese society through art, he added.", + "length": 121 + }, + { + "text": "But if you say you've never seen his Marilyn Monroe portrait, I would have to jump into the Huangpu river and kill myself!", + "length": 122 + }, + { + "text": "Once the Chinese public gains a deeper understanding of Warhol's work, he expects that the Mao works \"won't be as big a deal.", + "length": 125 + }, + { + "text": "To this day you can still find Campbell soup on the shelf in the grocery store and you can see multiples of them,\" Makos said.", + "length": 126 + }, + { + "text": "\" China's communist uniformity, with its blue sea of unisex Mao suits, appealed to Warhol's aesthetic obsession with repetition.", + "length": 128 + }, + { + "text": "Little did he know that he would eventually pose for a photo in front of the original portrait hanging in Beijing's Tiananmen Square.", + "length": 133 + }, + { + "text": "Maybe (the Chinese) don't know him, but they know his work,\" Makos said, predicting that Warhol \"will get bigger and bigger in China.", + "length": 133 + }, + { + "text": "When you look at his Coca-Cola works, that's directly related to Warhol and it's really amazing how many things he picked up from Andy.", + "length": 135 + }, + { + "text": "Ripping from the headlines, Warhol adopted Chairman Mao as his subject, applying his signature pop aesthetic to China's paramount leader.", + "length": 137 + }, + { + "text": "\" Warhol relied on a copy of Mao's portrait photograph in the leader's Little Red Book of ideological quotations to create his paintings.", + "length": 137 + }, + { + "text": "A search on China's popular Twitter-like platform revealed many posts by users expressing ignorance of whom Warhol was or why he is famous.", + "length": 139 + }, + { + "text": "\"As a young man, he was too shy to actually go and say hello,\" he said, recalling that Ai told him his English wasn't good enough at the time.", + "length": 142 + }, + { + "text": "\"It was one of the special places,\" said Christopher Makos, the artist's close friend and personal photographer, who accompanied him to China.", + "length": 142 + }, + { + "text": "While Shiner acknowledged the Mao portraits \"could be read as a sarcastic or ironic portrayal\", he said Warhol \"definitely wasn't being critical.", + "length": 145 + }, + { + "text": "Meanwhile, Makos will also hold an exhibition of his photographs of Warhol next month in Shanghai, including images from their 1982 trip to China.", + "length": 146 + }, + { + "text": "\" \"Of course, the primary concern is to get the show there and up and not put anything in a category that would ever question anything,\" Shiner said.", + "length": 149 + }, + { + "text": "He always liked to blur the lines on gender, and making colorful men somewhat beautiful was something that he liked to do as an inside joke,\" he added.", + "length": 151 + }, + { + "text": "Hong Kong (CNN) -- When American pop artist Andy Warhol visited Beijing in 1982 and was told there wasn't a McDonald's, he replied: \"Oh, but they will.", + "length": 151 + }, + { + "text": "Having recently launched a \"massive\" advertising campaign and sat for dozens of interviews with mainland media outlets, Shiner is hoping to reach the masses.", + "length": 157 + }, + { + "text": "\" Christie's first private Warhol sale in Hong Kong last November attracted a mostly Asian demographic and managed to sell nearly half of its lots, Good said.", + "length": 158 + }, + { + "text": "While Warhol's trip to Beijing was his first and only visit to mainland China, his engagement with the country started a decade earlier, inspired by former U.", + "length": 158 + }, + { + "text": "Warhol's influence on Chinese contemporary art can actually be traced back to 1981, when many contemporary artists, labeled as dissidents, fled the country, Shiner said.", + "length": 169 + }, + { + "text": "Many Chinese are familiar with certain Warhol works, such as the Marilyn Monroe or the Chairman Mao portraits, reproductions of which dot cafes and tourist markets across Beijing.", + "length": 179 + }, + { + "text": "While Shiner was planning the exhibition with the host venue -- the Shanghai Power Station of Art -- its staff advised that exhibiting the Mao works wasn't a \"good idea right now.", + "length": 179 + }, + { + "text": "Both artists have gone on to become some of the most recognized and celebrated names in Chinese contemporary art, and some would go as far as calling Ai Weiwei \"China's Andy Warhol.", + "length": 181 + }, + { + "text": "While most of them went to Paris and Berlin, two artists \"very specifically went to New York because they wanted quite literally to be part of Andy's universe\" -- Ai Weiwei and Xu Bing.", + "length": 185 + }, + { + "text": "\"Mao was front-page news in America and that was often where Warhol got his biggest inspiration,\" said Eric Shiner, director of Pittsburgh's Andy Warhol Museum, which organized the exhibition.", + "length": 192 + }, + { + "text": "Shiner readily concurred: \"He's really gone on to model his entire art-making process and career on proven Warhol tactics, looking at repetition, multiplication, and critique of consumer culture.", + "length": 195 + }, + { + "text": "While Warhol is well-known within art and fashion circles in China (Shiner said 600 of these cultural elite attended the exhibition's pre-opening), he remains unknown to the average Chinese citizen.", + "length": 198 + }, + { + "text": "\" Twenty-six years after his death, Warhol, whose much-lauded prescience extended across visual and consumer culture, has popped up in China once again -- and he was right about the fast-food chain.", + "length": 198 + }, + { + "text": "\"For the first time, I learned the charm of pop art,\" Weibo user @Yanmingdu wrote about the exhibition, while user @GracieMankedun posted, \"Just saw Andy Warhol's exhibition and I got a little confused.", + "length": 202 + }, + { + "text": "The industrialist Alfred Siu had invited him to the city to attend the opening of a night club, decorated with portraits of Britain's Prince Charles and Princess Diana that he had commissioned from the artist.", + "length": 209 + }, + { + "text": "Warhol posed for Makos' camera with gestures he adopted from the tai chi practitioners he observed outdoors -- and even adopted the bared-teeth expression of the guardian lion in the Forbidden City in one photo.", + "length": 211 + }, + { + "text": "\" \"The curiosity is greater than the awareness,\" said John Good, international director for post-war and contemporary art at Christie's, which is holding its second private sale of Warhol's work in Hong Kong this week.", + "length": 218 + }, + { + "text": "\"One of the reasons why I wanted to do this show is so the general public can learn about the artist behind these iconic works and realize (Mao and Marilyn Monroe) are just a few of thousands of images he made,\" he said.", + "length": 220 + }, + { + "text": "Ai and Xu aside, the Warhol aesthetic and vocabulary has deeply influenced Chinese contemporary artists over the past 10-15 years, with its characteristic combinations of social realist imagery with pop culture and iconic brands.", + "length": 229 + }, + { + "text": "\" An editorial in the state-backed Global Times newspaper suggested that while Warhol may not have had ill intent, the \"provocative\" blotches of color splattered on Mao's face suggested that he was wearing make-up -- a disrespectful portrayal of the iconic leader.", + "length": 264 + }, + { + "text": "\"Andy Warhol: 15 Minutes Eternal,\" the first major retrospective of his work in China, recently arrived in Shanghai with the aim of acquainting the Chinese public with the artist who created some of the most famous paintings of the most iconic figure in the country's history.", + "length": 276 + }, + { + "text": "As for Xu Bing, viewers may not immediately see Warhol in his work, Shiner said, but he described the artist as a \"huge fan of Warhol\" who \"loves the idea of repetition -- the formal arrangement of Chinese character after Chinese character, an endless array of similar looking imagery.", + "length": 285 + }, + { + "text": "\" \"Ai Weiwei loves the idea of multiples,\" Makos pointed out, noting Ai's most famous installations, including the 9,000 backpacks representing the schoolchildren killed in the 2008 Sichuan earthquake, and the millions of porcelain sunflower seeds he poured into the Turbine Hall of London's Tate Modern museum.", + "length": 311 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4232131242752075 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:20.397081222Z", + "first_section_created": "2025-12-23T09:34:20.397435237Z", + "last_section_published": "2025-12-23T09:34:20.39775915Z", + "all_results_received": "2025-12-23T09:34:20.528977138Z", + "output_generated": "2025-12-23T09:34:20.52927735Z", + "total_processing_time_ms": 132, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 131, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:20.397435237Z", + "publish_time": "2025-12-23T09:34:20.397630845Z", + "first_worker_start": "2025-12-23T09:34:20.398312472Z", + "last_worker_end": "2025-12-23T09:34:20.52811Z", + "total_journey_time_ms": 130, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:20.398372675Z", + "start_time": "2025-12-23T09:34:20.398459978Z", + "end_time": "2025-12-23T09:34:20.398580883Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:20.398634Z", + "start_time": "2025-12-23T09:34:20.398799Z", + "end_time": "2025-12-23T09:34:20.52811Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 129 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:20.398241069Z", + "start_time": "2025-12-23T09:34:20.398312472Z", + "end_time": "2025-12-23T09:34:20.398415876Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:20.398300672Z", + "start_time": "2025-12-23T09:34:20.398374875Z", + "end_time": "2025-12-23T09:34:20.398420276Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:20.397661546Z", + "publish_time": "2025-12-23T09:34:20.39775915Z", + "first_worker_start": "2025-12-23T09:34:20.398299672Z", + "last_worker_end": "2025-12-23T09:34:20.463284Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:20.398422977Z", + "start_time": "2025-12-23T09:34:20.398471879Z", + "end_time": "2025-12-23T09:34:20.398546282Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:20.398782Z", + "start_time": "2025-12-23T09:34:20.398915Z", + "end_time": "2025-12-23T09:34:20.463284Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:20.398536981Z", + "start_time": "2025-12-23T09:34:20.398635185Z", + "end_time": "2025-12-23T09:34:20.39875419Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:20.398229569Z", + "start_time": "2025-12-23T09:34:20.398299672Z", + "end_time": "2025-12-23T09:34:20.398385275Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 193, + "min_processing_ms": 64, + "max_processing_ms": 129, + "avg_processing_ms": 96, + "median_processing_ms": 129, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4722, + "slowest_section_id": 0, + "slowest_section_time_ms": 130 + } +} diff --git a/data/output/0040ec9845ce9a6d80dc9380aa2f6cff60e3f186.json b/data/output/0040ec9845ce9a6d80dc9380aa2f6cff60e3f186.json new file mode 100644 index 0000000..69baf9a --- /dev/null +++ b/data/output/0040ec9845ce9a6d80dc9380aa2f6cff60e3f186.json @@ -0,0 +1,226 @@ +{ + "file_name": "0040ec9845ce9a6d80dc9380aa2f6cff60e3f186.txt", + "total_words": 341, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "button", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "facebook", + "count": 7 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "from", + "count": 6 + }, + { + "word": "on", + "count": 6 + }, + { + "word": "that", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "This new one allows for more passive aggression.", + "length": 48 + }, + { + "text": "Facebook insists it’s not about passive aggression, it’s about tidiness.", + "length": 76 + }, + { + "text": "Facebook began rolling out the ‘Unfollow’ button and a related change to its users on Monday.", + "length": 97 + }, + { + "text": "‘This means you are still friends, but updates from that person won't appear in your News Feed.", + "length": 97 + }, + { + "text": "The latter alternative severs ties with that person on the social network, without notifying them.", + "length": 98 + }, + { + "text": "The button may be accessed by clicking a dropdown menu beside a friend’s recent update in your newsfeed.", + "length": 106 + }, + { + "text": "When clicked, the selected update and all future correspondences from that person will placed out of sight and, thus, out of mind.", + "length": 130 + }, + { + "text": "Dislike: When you've read one too many updates from a Facebook friend you can't stand, you may now mute them with the Unfollow button .", + "length": 135 + }, + { + "text": "The goal of this change is to help people curate their newsfeed and see more of the content that they care about,’ Facebook told Reuters.", + "length": 139 + }, + { + "text": "Like the previous button, it gives users the option of blocking content from certain people without offending them, say through de-friending.", + "length": 141 + }, + { + "text": "Facebook has replaced its Hide All button with a more to-the-point ‘Unfollow,’ letting users block all messages and posts from selected friends.", + "length": 148 + }, + { + "text": "Never do this: For demonstration purposes only, here is where to find the Unfollow button--which lets you mute annoying friends--on your Facebook News Feed .", + "length": 157 + }, + { + "text": "‘The goal of this change is to help people curate their News Feed and see more of the content that they care about,’ a Facebook spokesperson told NBC News.", + "length": 159 + }, + { + "text": "It added a ‘Following’ button next to the usual ‘Like’ button on a page or next to the ‘Friends’ button on a personal timeline, which will also enable users to block posts.", + "length": 184 + }, + { + "text": "The world's largest social network is constantly tweaking its newsfeed - the main page users look at on the network - often by reducing clutter, especially from advertising, and bringing to the surface or revealing the posts deemed most relevant to any particular user.", + "length": 269 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5107213854789734 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:20.898922048Z", + "first_section_created": "2025-12-23T09:34:20.899205059Z", + "last_section_published": "2025-12-23T09:34:20.899367465Z", + "all_results_received": "2025-12-23T09:34:20.960485129Z", + "output_generated": "2025-12-23T09:34:20.960627934Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:20.899205059Z", + "publish_time": "2025-12-23T09:34:20.899367465Z", + "first_worker_start": "2025-12-23T09:34:20.899927888Z", + "last_worker_end": "2025-12-23T09:34:20.95953Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:20.899917588Z", + "start_time": "2025-12-23T09:34:20.899988991Z", + "end_time": "2025-12-23T09:34:20.900038293Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:20.900177Z", + "start_time": "2025-12-23T09:34:20.900329Z", + "end_time": "2025-12-23T09:34:20.95953Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:20.899943989Z", + "start_time": "2025-12-23T09:34:20.900004891Z", + "end_time": "2025-12-23T09:34:20.900052293Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:20.899878486Z", + "start_time": "2025-12-23T09:34:20.899927888Z", + "end_time": "2025-12-23T09:34:20.899954589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2025, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0040f73df17ba56d766a1d67317c8dfa63dcb5e6.json b/data/output/0040f73df17ba56d766a1d67317c8dfa63dcb5e6.json new file mode 100644 index 0000000..868848b --- /dev/null +++ b/data/output/0040f73df17ba56d766a1d67317c8dfa63dcb5e6.json @@ -0,0 +1,302 @@ +{ + "file_name": "0040f73df17ba56d766a1d67317c8dfa63dcb5e6.txt", + "total_words": 830, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "a", + "count": 40 + }, + { + "word": "in", + "count": 33 + }, + { + "word": "is", + "count": 19 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "it", + "count": 17 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "for", + "count": 15 + }, + { + "word": "has", + "count": 15 + }, + { + "word": "property", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "But I knew I had to.", + "length": 20 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Emma Glanfield for MailOnline .", + "length": 31 + }, + { + "text": "We have already had a lot of interest in it though.", + "length": 51 + }, + { + "text": "'I don't think we have had anything as small as this before.", + "length": 60 + }, + { + "text": "The current owner said: 'My friends said I was mad to buy it.", + "length": 61 + }, + { + "text": "'It a fun little place but it is only suitable for a single person.", + "length": 67 + }, + { + "text": "This is around £450 per month less than the average monthly rent paid in the UK.", + "length": 81 + }, + { + "text": "The Round House is a pint-sized dwelling described by estate agents as 'quaint and quirky'.", + "length": 91 + }, + { + "text": "Another tiny property in the capital is a one-bedroom house in Denmark Hill, south-east London.", + "length": 95 + }, + { + "text": "It has now once again been put on the market and will go up for auction in the capital on October 1.", + "length": 100 + }, + { + "text": "One of Britain's smallest detached homes has been put up for rent near the town of Biggar in Scotland.", + "length": 103 + }, + { + "text": "Set over five levels - a lower ground, ground, first, second and third floor – it has just 1,000sq ft of space.", + "length": 113 + }, + { + "text": "The slim London house was built in a driveway in 1996 by a keen entrepreneur who wanted to make some extra income.", + "length": 114 + }, + { + "text": "Shirley Kenyon, a lettings co-ordinator, said: 'It's a fun little place but it is only suitable for a single person.", + "length": 116 + }, + { + "text": "It has been owned by him ever since until it was put up for sale and snapped up by a property developer three months ago.", + "length": 121 + }, + { + "text": "I'm confident there are quirky people who will want to buy it - as it's cheaper than many one-bedroom homes in this area.", + "length": 121 + }, + { + "text": "' The narrowest house in London is believed to be a property on Goldhawk Road, Shepherds Bush, which stands just over 5ft wide.", + "length": 127 + }, + { + "text": "Put on the market for £450,000 earlier this year, the two-storey end of terrace house is just 8ft wide and has just 466 sq ft of space.", + "length": 136 + }, + { + "text": "Set in the grounds of a private estate, the circular home has a small entrance hall, one bedroom, a living room/kitchen and shower room.", + "length": 136 + }, + { + "text": "The unusual property in Symington, near Biggar in South Lanarkshire, has been put on the rental market for just £375 per calendar month.", + "length": 137 + }, + { + "text": "The skinny two-storey home, which is one of Britain's narrowest properties, boasts two bedrooms, a kitchen, reception room and a bathroom.", + "length": 138 + }, + { + "text": "However the building - known as The Wedge - does spread to 22ft wide as it moves back from the road, which has an average property value of £63,000.", + "length": 149 + }, + { + "text": "This two-storey Victorian terrace (far right) near London's Olympic Park went up for sale for more than £200,000 in 2012, despite being just 7ft wide .", + "length": 152 + }, + { + "text": "Shirley Kenyon, lettings co-ordinator at CKD Galbraith, in Galashiels, said: 'It has a tiny entrance hall and there is a living room, bedroom and en suite.", + "length": 155 + }, + { + "text": "' The property, described as 'quaint and quirky' by estate agents, has a small entrance hall, one bedroom, a living room and kitchen area and a shower room .", + "length": 158 + }, + { + "text": "One of the other narrowest homes in London is this one-bedroom property in Denmark Hill, south-east London which is just 8ft wide and has just 466 sq ft of space .", + "length": 163 + }, + { + "text": "The super-slim two-bedroom terraced house, situated in Haringey, north London, is just 7ft wide and was built in a former driveway by a keen entrepreneur in 1996 .", + "length": 163 + }, + { + "text": "The narrowest property in Britain is a terraced home on the Isle of Cumbrae, off the Ayrshire coast in Scotland, which has a front facade measuring just 3ft wide .", + "length": 164 + }, + { + "text": "The Round House is a pint-sized dwelling in Symington, near Biggar in South Lanarkshire, which has been put on the rental market for just £375 per calendar month .", + "length": 164 + }, + { + "text": "The skinny property, which is one of Britain's narrowest homes, boasts two bedrooms, a kitchen, reception room and a bathroom despite being no bigger than 7ft wide .", + "length": 165 + }, + { + "text": "However, the narrowest property in the UK is a terraced home on the Isle of Cumbrae, off the Ayrshire coast in Scotland, which has a front facade measuring just 3ft wide.", + "length": 170 + }, + { + "text": "A super-slim two-bedroom terraced house which is just 7ft wide has been put on the market for £235,000 – the same price as a four-bedroom detached home elsewhere in the country.", + "length": 180 + }, + { + "text": "Situated in Haringey, north London, the property is due to be auctioned next month with a starting price of £235,000 which is the same cost of a property twice the size currently on offer in Burnley, Lancashire.", + "length": 212 + }, + { + "text": "The current owner, a property developer, said: 'I'm confident there are quirky people who will want to buy it - as it's cheaper than many one-bedroom homes in this area' The property will go up for auction in London on October 1 with a starting price of £235,000 - the same cost as a four-bedroom house currently up for sale in Burnley .", + "length": 339 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5169294476509094 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:21.399572825Z", + "first_section_created": "2025-12-23T09:34:21.401792114Z", + "last_section_published": "2025-12-23T09:34:21.402015623Z", + "all_results_received": "2025-12-23T09:34:21.467391958Z", + "output_generated": "2025-12-23T09:34:21.467561765Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:21.401792114Z", + "publish_time": "2025-12-23T09:34:21.402015623Z", + "first_worker_start": "2025-12-23T09:34:21.402630648Z", + "last_worker_end": "2025-12-23T09:34:21.46645Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:21.402624748Z", + "start_time": "2025-12-23T09:34:21.402703451Z", + "end_time": "2025-12-23T09:34:21.402800655Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:21.40289Z", + "start_time": "2025-12-23T09:34:21.403062Z", + "end_time": "2025-12-23T09:34:21.46645Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:21.402543444Z", + "start_time": "2025-12-23T09:34:21.402630648Z", + "end_time": "2025-12-23T09:34:21.402749353Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:21.402619747Z", + "start_time": "2025-12-23T09:34:21.402715851Z", + "end_time": "2025-12-23T09:34:21.402760053Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4465, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/004135e34c22bc5c1954f0cfa3c627943486d0b9.json b/data/output/004135e34c22bc5c1954f0cfa3c627943486d0b9.json new file mode 100644 index 0000000..9038b3c --- /dev/null +++ b/data/output/004135e34c22bc5c1954f0cfa3c627943486d0b9.json @@ -0,0 +1,302 @@ +{ + "file_name": "004135e34c22bc5c1954f0cfa3c627943486d0b9.txt", + "total_words": 733, + "top_n_words": [ + { + "word": "the", + "count": 46 + }, + { + "word": "of", + "count": 33 + }, + { + "word": "news", + "count": 22 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "police", + "count": 16 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "that", + "count": 10 + }, + { + "word": "was", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "K.", + "length": 2 + }, + { + "text": "News Corp.", + "length": 10 + }, + { + "text": "The News Corp.", + "length": 14 + }, + { + "text": "and police said Saturday.", + "length": 25 + }, + { + "text": "subsidiary that publishes the Sun and other U.", + "length": 46 + }, + { + "text": "newspapers, London's Metropolitan Police Service said.", + "length": 54 + }, + { + "text": "He works for the force's Territorial Policing command.", + "length": 54 + }, + { + "text": "A fourth, aged 42, was arrested at an east London police station.", + "length": 65 + }, + { + "text": "Representatives of a range of news outlets have appeared before it.", + "length": 67 + }, + { + "text": "Three people have been arrested in connection with both investigations.", + "length": 71 + }, + { + "text": "said it is cooperating with the search of its News International offices.", + "length": 73 + }, + { + "text": "By late Saturday, police said that all five men were subsequently released after posting bail.", + "length": 94 + }, + { + "text": "A spokeswoman for News International earlier declined to comment on the search of its offices.", + "length": 94 + }, + { + "text": "A public inquiry has also been set up to look at claims of widespread misconduct by the British media.", + "length": 102 + }, + { + "text": "Police searched the men's homes as well as the East London offices of News International, the News Corp.", + "length": 104 + }, + { + "text": "Operation Elveden is overseen by the British police watchdog, the Independent Police Complaints Commission.", + "length": 107 + }, + { + "text": "Three of the men were arrested at their homes -- two of them, aged 49 and 57, in the county of Essex, and one aged 48 in London.", + "length": 128 + }, + { + "text": "As a result of that review, which is ongoing, the MSC provided information to the Elveden investigation which led to today's arrests.", + "length": 133 + }, + { + "text": "The current and past newspaper employees were all set to \"return pending further inquiries\" in April or May, according to the Scotland Yard statement.", + "length": 150 + }, + { + "text": "statement said the company had \"made a commitment last summer that unacceptable news gathering practices by individuals in the past would not be repeated.", + "length": 154 + }, + { + "text": "\" The committee was asked \"to proactively co-operate with law enforcement and other authorities if potentially relevant information arose at those titles.", + "length": 154 + }, + { + "text": "\" The Sun, which is Britain's best-selling tabloid newspaper, was the sister paper of News International's now-defunct Sunday title, the News of the World.", + "length": 155 + }, + { + "text": "However, for the purpose of reaching these settlements only, NGN agreed that the damages to be paid to claimants should be assessed as if this was the case.", + "length": 156 + }, + { + "text": "Earlier, they'd been questioned on suspicion of corruption, aiding and abetting misconduct in a public office, and conspiracy in relation to those offenses.", + "length": 156 + }, + { + "text": "\" Both James and Rupert Murdoch, as well as senior executives at News International, have testified before British lawmakers examining allegations of wrongdoing.", + "length": 161 + }, + { + "text": "James Murdoch, chief executive of News International and the son of media mogul Rupert Murdoch, has insisted that the practice of phone hacking was not widespread.", + "length": 163 + }, + { + "text": "There have been 13 arrests in connection with Operation Elveden and 17 in relation to Operation Weeting, the phone hacking inquiry, the Metropolitan Police confirmed.", + "length": 166 + }, + { + "text": "London (CNN) -- Four current and former employees of Britain's Sun newspaper were arrested by authorities investigating claims of inappropriate payments to police, News Corp.", + "length": 174 + }, + { + "text": "News International said the company \"made no admission as part of these settlements that directors or senior employees knew about the wrongdoing by NGN or sought to conceal it.", + "length": 176 + }, + { + "text": "The best-selling News of the World tabloid was shuttered in July amid outrage over claims that its staff hacked the voicemail of a missing 13-year-old girl who turned out to have been murdered.", + "length": 193 + }, + { + "text": "The investigation into alleged corruption, known as Operation Elveden, is being run in conjunction with an inquiry into phone hacking prompted by allegations of wrongdoing at News of the World.", + "length": 193 + }, + { + "text": "Among those who read statements in court were Labour Party lawmaker Chris Bryant, former Deputy Prime Minister John Prescott, actor Jude Law, the actor's ex-wife Sadie Frost, and high-profile rugby player Gavin Henson.", + "length": 218 + }, + { + "text": "A 29-year-old police officer was also arrested Saturday at the central London police station where he works, police said, on suspicion of corruption, misconduct in a public office and conspiracy in relation to both offenses.", + "length": 224 + }, + { + "text": "Police said the operation was the result of information provided to police by News Corporation's Management and Standards Committee (MSC), which was set up to look into conduct at News International, a subsidiary of News Corp.", + "length": 226 + }, + { + "text": "News Group Newspapers, a subsidiary of News International that was the publisher of News of the World, agreed to payouts in the High Court totaling hundreds of thousands of dollars earlier this month over phone hacking claims.", + "length": 226 + }, + { + "text": "The operation \"relates to suspected payments to police officers and is not about seeking journalists to reveal confidential sources in relation to information that has been obtained legitimately,\" an earlier police statement said.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5861343741416931 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:21.902782405Z", + "first_section_created": "2025-12-23T09:34:21.90439437Z", + "last_section_published": "2025-12-23T09:34:21.904600278Z", + "all_results_received": "2025-12-23T09:34:21.963583255Z", + "output_generated": "2025-12-23T09:34:21.963764863Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:21.90439437Z", + "publish_time": "2025-12-23T09:34:21.904600278Z", + "first_worker_start": "2025-12-23T09:34:21.905110099Z", + "last_worker_end": "2025-12-23T09:34:21.96262Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:21.905098098Z", + "start_time": "2025-12-23T09:34:21.905168501Z", + "end_time": "2025-12-23T09:34:21.905270605Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:21.90526Z", + "start_time": "2025-12-23T09:34:21.905397Z", + "end_time": "2025-12-23T09:34:21.96262Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:21.905042196Z", + "start_time": "2025-12-23T09:34:21.905110199Z", + "end_time": "2025-12-23T09:34:21.905200002Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:21.905040296Z", + "start_time": "2025-12-23T09:34:21.905110099Z", + "end_time": "2025-12-23T09:34:21.9051426Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4549, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/004137029bea1cf802283d003dd865755190fb57.json b/data/output/004137029bea1cf802283d003dd865755190fb57.json new file mode 100644 index 0000000..2841a26 --- /dev/null +++ b/data/output/004137029bea1cf802283d003dd865755190fb57.json @@ -0,0 +1,472 @@ +{ + "file_name": "004137029bea1cf802283d003dd865755190fb57.txt", + "total_words": 972, + "top_n_words": [ + { + "word": "the", + "count": 51 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "fire", + "count": 22 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "for", + "count": 15 + }, + { + "word": "we", + "count": 15 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "s", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "all!", + "length": 4 + }, + { + "text": "duty.", + "length": 5 + }, + { + "text": "' By .", + "length": 6 + }, + { + "text": "cards.", + "length": 6 + }, + { + "text": "shots.", + "length": 6 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Until now.", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Smokin' hot!", + "length": 12 + }, + { + "text": "Martha De Lacey .", + "length": 17 + }, + { + "text": "Fighters Charity .", + "length": 18 + }, + { + "text": "for the 2014 edition.", + "length": 21 + }, + { + "text": "other no-hold-barred poses.", + "length": 27 + }, + { + "text": "09:00 EST, 13 November 2012 .", + "length": 29 + }, + { + "text": "12:57 EST, 13 November 2012 .", + "length": 29 + }, + { + "text": "maintain our dignity as well!", + "length": 29 + }, + { + "text": "fires or checking smoke alarms.", + "length": 31 + }, + { + "text": "money as we could but also to .", + "length": 31 + }, + { + "text": "Barnsley, Rotherham and Sheffield.", + "length": 34 + }, + { + "text": "But we all had a really good time.", + "length": 34 + }, + { + "text": "Despite obtaining permission to film on .", + "length": 41 + }, + { + "text": "'We desperately wanted to raise as much .", + "length": 41 + }, + { + "text": "'We hope it will be the first of a series.", + "length": 42 + }, + { + "text": "All of us were a bit embarrassed about posing.", + "length": 46 + }, + { + "text": "But once we relaxed the photographer made us feel at ease.", + "length": 58 + }, + { + "text": "It was good fun and we've all become great friends by doing it.", + "length": 63 + }, + { + "text": "But the the calendar has caused such a buzz there should be no .", + "length": 64 + }, + { + "text": "in fires - and the Fire Fighters' Charity does so much for them.", + "length": 64 + }, + { + "text": "with the permission of senior officers and involved ladies from .", + "length": 65 + }, + { + "text": "fire service premises, there was no question of any pole dancing or .", + "length": 69 + }, + { + "text": "To avoid bringing the fire service into disrepute the women limited .", + "length": 69 + }, + { + "text": "Fire girl Fleur Doyle, 37, was among the first to answer the call of .", + "length": 70 + }, + { + "text": "up their coats to show what they wear underneath - when not fighting .", + "length": 70 + }, + { + "text": "shortage of female fire-fighting buddies willing to strut their stuff .", + "length": 71 + }, + { + "text": "The money will go to Cancer Research UK and the Fire Fighters' Charity.", + "length": 71 + }, + { + "text": "And we have got colleagues who have been off sick that have been burned .", + "length": 73 + }, + { + "text": "maintain our dignity as well - because we have to come to work after it .", + "length": 73 + }, + { + "text": "girls are confident of raising £5,000 are a further print run is on the .", + "length": 74 + }, + { + "text": "themselves to stripping off their tunics to reveal their bras or opening .", + "length": 74 + }, + { + "text": "The pictures for the calendar were taken at a fire station in Rotherham, .", + "length": 74 + }, + { + "text": "' The first published 1,000 calendars are selling so well at £5 each the .", + "length": 75 + }, + { + "text": "' With only nine girls and 12 months to fill three of the pages are group .", + "length": 75 + }, + { + "text": "'We are all fire fighters but we are also all normal people doing a normal job.", + "length": 79 + }, + { + "text": "But the fire men made us full of confidence and we had a lot of help form the guys.", + "length": 83 + }, + { + "text": "Left: May's fighter Jade Styan, 26, White Watch; Right: The cover of Fire Girls 2013 .", + "length": 86 + }, + { + "text": "But it's never been done by the brave fire-fighting women of Britain's emergency services.", + "length": 90 + }, + { + "text": "She blushed: 'At the photo shoot it was a bit daunting to whip our jackets off and expose ourselves in our bras.", + "length": 112 + }, + { + "text": "'We were all a bit nervous at first, but ended up having a really good time and it has brought us closer together.", + "length": 114 + }, + { + "text": "'All of us have friends or family that have been struck by cancer and Cancer Research UK is such a brilliant cause.", + "length": 115 + }, + { + "text": "' Left: March's Crew Manager Sarah McCarthy, 32, Blue Watch; Right: June's Crew Manager Sally Stark, 54, Blue Watch .", + "length": 117 + }, + { + "text": "Aged 23 to 54, the women admitted they were nervous at first but now hope the calender will be the first in a series.", + "length": 117 + }, + { + "text": "Left: September's firefighter Sian Nortcliffe, 36, Green Watch; Right: July's firefighter Fleur Doyle, 37, Red Watch .", + "length": 118 + }, + { + "text": "Left: November's firefighter Claire Morton, 36, Blue Watch; Right: February's firefighter Nicole Hallat, 24, Red Watch .", + "length": 120 + }, + { + "text": "Miss August - Angie Thorp, 40, a South Yorkshire fire service crew manager - said: 'None of us had ever modelled before.", + "length": 120 + }, + { + "text": "Miss Thorp added: 'So many girls are interested in taking part next year it is a shame there are not 24 months in a year.", + "length": 121 + }, + { + "text": "Female fire-fighters strip off for Britain's first ever all-women charity calender to raise money for Cancer Research UK .", + "length": 122 + }, + { + "text": "All of us did the race for life twice in full kit with breathing apparatus and decided to do the calendar as the next step.", + "length": 123 + }, + { + "text": "Left: April's firefighter Frankie Salerno, 27, Blue Watch; Right: August's Crew Manager Angie Thorp, 40, from White Watch .", + "length": 123 + }, + { + "text": "We are planning to do a double sided calendar next year with the men on one side and the girls on the other - and Christmas cards.", + "length": 130 + }, + { + "text": "Firemen stripping down to their flame-retardant trousers for charity calendars is an end-of-the-year tradition all over the country.", + "length": 132 + }, + { + "text": "October: From left, Fleur Doyle, Jade Styan, Nicole Hallat, Claire Morton, Frankie Salerno, Sally Stark, Sian Nortcliffe and Angie Thrope .", + "length": 139 + }, + { + "text": "'Our husbands were also very supportive and many of those buying the calendar are male colleagues' wives who want it as a stocking filler for their husbands.", + "length": 157 + }, + { + "text": "To those who might consider this way of raising money for charity undignified, the women said: 'We desperately wanted to raise as much money as we could but also to .", + "length": 166 + }, + { + "text": "' December: The girls pose in their uniforms alongside Father Christmas and his giant hose, in a calender shoot to raise money for Cancer Research UK and the Fire Fighters Charity .", + "length": 181 + }, + { + "text": "The women from Barnsley, Rotherham and Sheffield emergency services have stripped down to their underwear for the first UK female-only fire fighter calendar, Fire Girls 2013, which is being sold on eBay for £6.", + "length": 212 + }, + { + "text": "The ladies of the South Yorkshire fire brigade have been even braver than usual, stripping off to take part in a scantily clad photo shoot for a calender to raise money for Cancer Research UK and the Fire Fighters Charity.", + "length": 222 + }, + { + "text": "In the smoking hot photos the women - Fleur Doyle, Jade Styan, Nicole Hallat, Claire Morton, Frankie Salerno, Sally Stark, Sian Nortcliffe and Angie Thrope - are seen posing in their bras holding hoses, leaning across laders, fooling around in the fire foam, hanging out with Father Christmas, wearing hard hats, drinking water and lounging across cars and beside the fire truck.", + "length": 379 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6140580773353577 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:22.40415886Z", + "first_section_created": "2025-12-23T09:34:22.406103338Z", + "last_section_published": "2025-12-23T09:34:22.406505554Z", + "all_results_received": "2025-12-23T09:34:22.515701355Z", + "output_generated": "2025-12-23T09:34:22.515956465Z", + "total_processing_time_ms": 111, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 109, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:22.406103338Z", + "publish_time": "2025-12-23T09:34:22.406373349Z", + "first_worker_start": "2025-12-23T09:34:22.406803966Z", + "last_worker_end": "2025-12-23T09:34:22.497025Z", + "total_journey_time_ms": 90, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:22.406813466Z", + "start_time": "2025-12-23T09:34:22.406875969Z", + "end_time": "2025-12-23T09:34:22.406973873Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:22.407079Z", + "start_time": "2025-12-23T09:34:22.407234Z", + "end_time": "2025-12-23T09:34:22.497025Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 89 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:22.406810966Z", + "start_time": "2025-12-23T09:34:22.40690057Z", + "end_time": "2025-12-23T09:34:22.407015275Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:22.406727963Z", + "start_time": "2025-12-23T09:34:22.406803966Z", + "end_time": "2025-12-23T09:34:22.406861668Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:22.40641355Z", + "publish_time": "2025-12-23T09:34:22.406505554Z", + "first_worker_start": "2025-12-23T09:34:22.406855968Z", + "last_worker_end": "2025-12-23T09:34:22.514809Z", + "total_journey_time_ms": 108, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:22.406933771Z", + "start_time": "2025-12-23T09:34:22.406959372Z", + "end_time": "2025-12-23T09:34:22.406971073Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:22.407389Z", + "start_time": "2025-12-23T09:34:22.407525Z", + "end_time": "2025-12-23T09:34:22.514809Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 107 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:22.40691017Z", + "start_time": "2025-12-23T09:34:22.406942072Z", + "end_time": "2025-12-23T09:34:22.406964373Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:22.406825367Z", + "start_time": "2025-12-23T09:34:22.406855968Z", + "end_time": "2025-12-23T09:34:22.406863268Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 196, + "min_processing_ms": 89, + "max_processing_ms": 107, + "avg_processing_ms": 98, + "median_processing_ms": 107, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2740, + "slowest_section_id": 1, + "slowest_section_time_ms": 108 + } +} diff --git a/data/output/00413c507d28a0a8fae46f6ceb0dd43fd8da91be.json b/data/output/00413c507d28a0a8fae46f6ceb0dd43fd8da91be.json new file mode 100644 index 0000000..4bf9390 --- /dev/null +++ b/data/output/00413c507d28a0a8fae46f6ceb0dd43fd8da91be.json @@ -0,0 +1,274 @@ +{ + "file_name": "00413c507d28a0a8fae46f6ceb0dd43fd8da91be.txt", + "total_words": 588, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "peters", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "has", + "count": 10 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "i", + "count": 8 + }, + { + "word": "been", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "I loved my job.", + "length": 15 + }, + { + "text": "'She’s just not same...", + "length": 25 + }, + { + "text": "'I want to go back to work ...", + "length": 30 + }, + { + "text": "'I had to turn down a really good opportunity ...", + "length": 49 + }, + { + "text": "'I won’t have the operation here after everything, would you?", + "length": 63 + }, + { + "text": "it would have been a big break for us, I’ve lost a lot of money.", + "length": 66 + }, + { + "text": "' The incident has forced the couple to have to rely on Centrelink.", + "length": 67 + }, + { + "text": "Mrs Peters has been left with memory loss and hindered motor skills.", + "length": 68 + }, + { + "text": "Mrs Peters has been left with memory loss and hindered motor skills .", + "length": 69 + }, + { + "text": "Mr Peters has been left devastated and uncertain with what the future holds.", + "length": 76 + }, + { + "text": "' Despite everything, Mrs Peters still has the small tumour on her adrenal gland.", + "length": 82 + }, + { + "text": "' Mrs Peters also fears how she will get by in life without 'any income coming in'.", + "length": 83 + }, + { + "text": "'It's about the worst thing that could happen to you at this stage in life,' she said.", + "length": 86 + }, + { + "text": "The hospital has offered to fly her to any hospital in Australia to have the operation.", + "length": 87 + }, + { + "text": "'Royal Darwin Hospital is a fully accredited facility that practices under the national standards.", + "length": 98 + }, + { + "text": "A 63-year-old woman has been left brain damaged after undergoing routine surgery to remove a small tumour.", + "length": 106 + }, + { + "text": "'I’ve had to apply for a disability pension for Michelle and a carer’s pension for me,' Mr Peters said.", + "length": 107 + }, + { + "text": "They can’t tell us how long it’s going to take to get better or if it is going to get better,' he said.", + "length": 107 + }, + { + "text": "Four-years ago the 63-year-old widow was given a chance at new life, marrying subcontractor Garry Peters.", + "length": 107 + }, + { + "text": "Now a full-time carer to his wife, Mr Peters has had to turn down jobs to look after his brain damaged wife.", + "length": 108 + }, + { + "text": "'They're going to take my licence away from me and I won't be able to go back to work because I'm a chef,' she said.", + "length": 116 + }, + { + "text": "A 63-year-old woman has been left brain damaged after undergoing routine surgery to remove a small tumour on her adrenal glands .", + "length": 129 + }, + { + "text": "'The Division of Surgery has been in discussion with the family and is sensitive to the distress they are experiencing,' Ms Sykes said.", + "length": 135 + }, + { + "text": "Sharon Sykes, the acting chief operating officer of Top End Health Service, said senior management at the hospital were 'aware of this case'.", + "length": 141 + }, + { + "text": "The incident forced doctors to place Mrs Peters in an induced coma for two days, spending the next five days in intensive care at the Royal Darwin Hospital.", + "length": 156 + }, + { + "text": "The incident forced doctors to place Mrs Peters in an induced coma for two days, spending the next five days in intensive care at the Royal Darwin Hospital .", + "length": 157 + }, + { + "text": "'You put a lot of trust in them and you think they’re going to do the right thing and when they don’t you do feel betrayed,' Mrs Peters told the NT News.", + "length": 157 + }, + { + "text": "Michelle Peters from the Northern Territory went in for an operation last month to remove a tumour on her adrenal gland when a surgical mishap saw carbon dioxide being pumped through the her liver and into her bloodstream.", + "length": 222 + }, + { + "text": "Should an adverse event occur within any of our hospitals we observe national incident management processes which include undertaking a full investigation; observing National Open Disclosure processes by discussing the event openly with patients, families and carers; and ensuring any remedial systems improvements and leanings are implemented as part of our safety and quality focus.", + "length": 384 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5992882251739502 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:22.906856219Z", + "first_section_created": "2025-12-23T09:34:22.907193733Z", + "last_section_published": "2025-12-23T09:34:22.907434542Z", + "all_results_received": "2025-12-23T09:34:22.971768335Z", + "output_generated": "2025-12-23T09:34:22.971929842Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:22.907193733Z", + "publish_time": "2025-12-23T09:34:22.907434542Z", + "first_worker_start": "2025-12-23T09:34:22.907902561Z", + "last_worker_end": "2025-12-23T09:34:22.970829Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:22.90786306Z", + "start_time": "2025-12-23T09:34:22.907924262Z", + "end_time": "2025-12-23T09:34:22.907990465Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:22.908079Z", + "start_time": "2025-12-23T09:34:22.908225Z", + "end_time": "2025-12-23T09:34:22.970829Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:22.90786856Z", + "start_time": "2025-12-23T09:34:22.907941863Z", + "end_time": "2025-12-23T09:34:22.908053967Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:22.907828258Z", + "start_time": "2025-12-23T09:34:22.907902561Z", + "end_time": "2025-12-23T09:34:22.907933863Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3263, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/004184716e05612159481f611e9b6d68271433bc.json b/data/output/004184716e05612159481f611e9b6d68271433bc.json new file mode 100644 index 0000000..73ff6a0 --- /dev/null +++ b/data/output/004184716e05612159481f611e9b6d68271433bc.json @@ -0,0 +1,488 @@ +{ + "file_name": "004184716e05612159481f611e9b6d68271433bc.txt", + "total_words": 1249, + "top_n_words": [ + { + "word": "the", + "count": 66 + }, + { + "word": "to", + "count": 31 + }, + { + "word": "a", + "count": 30 + }, + { + "word": "and", + "count": 28 + }, + { + "word": "saatchi", + "count": 26 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "her", + "count": 21 + }, + { + "word": "was", + "count": 18 + }, + { + "word": "he", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "intact.", + "length": 7 + }, + { + "text": "Friday.", + "length": 7 + }, + { + "text": "Earlier .", + "length": 9 + }, + { + "text": "‘They .", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "settlements.", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Distress: Charles .", + "length": 19 + }, + { + "text": "Fiona Shackleton is .", + "length": 21 + }, + { + "text": "02:59 EST, 31 July 2013 .", + "length": 25 + }, + { + "text": "03:11 EST, 1 August 2013 .", + "length": 26 + }, + { + "text": "Sara Nathan and Tara Brady .", + "length": 28 + }, + { + "text": "via Fiona Shackleton – to a speedy divorce.", + "length": 45 + }, + { + "text": "His collection is reported to be worth £200million.", + "length": 52 + }, + { + "text": "Mr Saatchi later accepted a police caution for assault.", + "length": 55 + }, + { + "text": "Fiona lodged the divorce document with the court last .", + "length": 55 + }, + { + "text": "Nigella’s first cousin and Charles highly respects her.", + "length": 57 + }, + { + "text": "It was later installed in a dedicated room in his own home.", + "length": 59 + }, + { + "text": "although her friends said she had hoped to repair their marriage .", + "length": 66 + }, + { + "text": "Saatchi announced he was divorcing Nigella in a lengthy statement - .", + "length": 69 + }, + { + "text": "The document was signed by the popular cook and broadcaster on July 9.", + "length": 70 + }, + { + "text": "Neither the couple or their legal representatives attended the hearing.", + "length": 71 + }, + { + "text": "It is thought the divorce will be rubber-stamped some time in September.", + "length": 72 + }, + { + "text": "have agreed never to make public the terms of the divorce or financial .", + "length": 72 + }, + { + "text": "with lawyers to deal directly with Nigella's high profile lawyer Fiona .", + "length": 72 + }, + { + "text": "Shackleton in an attempt to keep his multimillion pound art collection .", + "length": 72 + }, + { + "text": "' When there was no reply she said: 'I grant the pronouncement of decrees.", + "length": 74 + }, + { + "text": "He also alleged that she had held him by the throat during arguments at home.", + "length": 77 + }, + { + "text": "this month, friends of Mr Saatchi told the Mail:‘He and Nigella agreed – .", + "length": 78 + }, + { + "text": "Nigella moved out of their home and has rented an apartment with her son Bruno.", + "length": 79 + }, + { + "text": "She also answered ‘yes’ when asked if Mr Saatchi’s behaviour had continued.", + "length": 81 + }, + { + "text": "They are also understood to have signed a pre-nuptial agreement when they wed in 2003.", + "length": 86 + }, + { + "text": "' A decree nisi is a statement saying the court sees no reason why a divorce cannot be granted.", + "length": 95 + }, + { + "text": "Neither the 53-year-old television chef nor her art collector husband, 70, attended the hearing.", + "length": 96 + }, + { + "text": "Nigella Lawson was seen at Heathrow airport last night before the decree nisi was granted today .", + "length": 97 + }, + { + "text": "However papers lodged at the High Court revealed it was Miss Lawson who petitioned for the divorce.", + "length": 99 + }, + { + "text": "In Court 9, Lawson and Saatchi's names were 12th on a list read out among 14 other divorcing couples.", + "length": 101 + }, + { + "text": "At one point he even put his finger up her nose as they dined outside Scott’s restaurant in London.", + "length": 101 + }, + { + "text": "Lawson, who was the applicant in the case, also confirmed in the document that they were living apart.", + "length": 102 + }, + { + "text": "She is soon planning to head to Los Angeles where she is filming the new season of her ABC show The Taste.", + "length": 106 + }, + { + "text": "He has traded art over the years, making large profits on works by Damien Hirst and other British artists.", + "length": 106 + }, + { + "text": "Lawson flew into Heathrow Airport last night ahead of the documents being presented at the High Court today.", + "length": 108 + }, + { + "text": "The judge asked: 'Does any party or person wish to show cause against decrees being made or in the question of costs?", + "length": 117 + }, + { + "text": "‘He did not hire a lawyer himself and he has not even spoken to Helen Ward, whom a Sunday paper claimed he has hired.", + "length": 119 + }, + { + "text": "The pair now have to wait for a decree absolute, which is usually issued six weeks and a day later, ending their marriage.", + "length": 122 + }, + { + "text": "Mr Saatchi – who initially described the incident as a ‘playful tiff’ – has now accepted a police caution for assault.", + "length": 126 + }, + { + "text": "‘Fiona prepared a full court document, every detail of every asset was listed, and an agreement was reached with no dispute.", + "length": 126 + }, + { + "text": "Distraught: Nigella Lawson has now moved out of her marital home and into a rented apartment with her children Cosima and Bruno .", + "length": 129 + }, + { + "text": "The Saatchi Gallery, which Mr Saatchi opened in 1985, has included some of the UK’s most controversial and talked-about artworks.", + "length": 131 + }, + { + "text": "Nigella was then left reeling by Mr Saatchi’s ‘cruelty’ after he announced in The Mail on Sunday that he would be seeking a divorce.", + "length": 138 + }, + { + "text": "Mr Saatchi appeared to blame Nigella for the end of the marriage, claiming he decided to divorce her after she refused to defend him in public.", + "length": 143 + }, + { + "text": "Proceedings: After the decree nisi is announced today Nigella and soon-to-be-ex-husband Charles Saatchi will be formally divorced in six weeks .", + "length": 144 + }, + { + "text": "The end of a marriage: The photo of Charles Saatchi choking his wife Nigella Lawson led to their divorce, to be heard at the High Court tomorrow .", + "length": 146 + }, + { + "text": "Mr Saatchi said in his statement he had ‘clearly been a disappointment’ to his wife over the past year, and that they had been drifting apart.", + "length": 146 + }, + { + "text": "Both children have been seen comforting their mother during the past few tense weeks at her new rented home and will join her in Los Angeles this summer.", + "length": 153 + }, + { + "text": "'Heartbroken': Although Charles Saatchi announced he was divorcing Nigella Lawson - it was the TV star who was in the fact the applicant for the divorce .", + "length": 154 + }, + { + "text": "Mr Saatchi also famously bought Tracey Emin’s work My Bed for £150,000, for which he got an unmade bed littered with condoms, cigarette packets and underwear.", + "length": 161 + }, + { + "text": "They were reportedly arguing over Miss Lawson’s son Bruno, 17, and daughter Cosima, 19, from her marriage to journalist John Diamond, who died in 2001 from throat cancer.", + "length": 172 + }, + { + "text": "In a final, crushing act of emotional control, he released an extraordinary statement to say that the marriage was over — without giving her any warning of his intentions.", + "length": 173 + }, + { + "text": "Nigella Lawson yesterday ended her ten-year marriage to Charles Saatchi on the grounds of his ‘unreasonable behaviour’ – in a hearing which lasted for just 70 seconds.", + "length": 173 + }, + { + "text": "Court paperwork shows that Nigella Lawson applied to divorce Charles Saatchi on the grounds of his 'continuing unreasonable behaviour' It is understood Mr Saatchi dispensed .", + "length": 174 + }, + { + "text": "The tycoon's millions were amassed from the advertising agency Saatchi \u0026 Saatchi, which he founded with his brother Maurice, now Lord Saatchi, and various astute business deals.", + "length": 177 + }, + { + "text": "’ Baroness Shackleton, who advised the Prince of Wales in his divorce and represented Sir Paul McCartney, has been nicknamed the Steel Magnolia for her charm and determination.", + "length": 178 + }, + { + "text": "The couple's marriage ended after Mr Saatchi was pictured choking his wife, who has made her fortune as TV's Domestic Goddess, at their favourite London haunt, Scott's restaurant, last month.", + "length": 191 + }, + { + "text": "Friends say Nigella was willing right up until the end to give their marriage another chance and was left ‘floored’ and ‘blindsided’ by his decision to begin divorce proceedings against her.", + "length": 198 + }, + { + "text": "The pair, who are said to be worth an estimated £150million and shared a £12million home in Chelsea, are believed to have agreed never to publicly disclose the terms of the divorce or financial settlements.", + "length": 208 + }, + { + "text": "The current exhibition includes work from Jessica Jackson Hutchins, who makes papier-maché sculptures of household objects, and a piece by Jose Lerma and Hector Madera of a giant bust of a man made from paper.", + "length": 210 + }, + { + "text": "The couple were granted a decree nisi at the High Court in London on the grounds that the marriage had ‘irretrievably broken down’, less than eight weeks after they had a heated row which saw Mr Saatchi grabbing his wife’s throat.", + "length": 236 + }, + { + "text": "The former advertising tycoon initially brushed off the incident as nothing more than a ‘playful tiff’, but further images revealed Miss Lawson suffered 27 minutes of anguish and had her nose repeatedly tweaked and twisted by Mr Saatchi.", + "length": 241 + }, + { + "text": "Nigella is also said by friends to have invited Phoebe Saatchi, 19, Mr Saatchi's daughter from his second marriage to Kay Hartenstein, to stay at her rented home, although it is unclear as of yet whether she take her step-mother up on her offer.", + "length": 245 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5334375202655792 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:23.408225025Z", + "first_section_created": "2025-12-23T09:34:23.408534238Z", + "last_section_published": "2025-12-23T09:34:23.409017557Z", + "all_results_received": "2025-12-23T09:34:23.526529793Z", + "output_generated": "2025-12-23T09:34:23.526775703Z", + "total_processing_time_ms": 118, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 117, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:23.408534238Z", + "publish_time": "2025-12-23T09:34:23.408825749Z", + "first_worker_start": "2025-12-23T09:34:23.409280568Z", + "last_worker_end": "2025-12-23T09:34:23.482485Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:23.409300068Z", + "start_time": "2025-12-23T09:34:23.409386872Z", + "end_time": "2025-12-23T09:34:23.40957878Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:23.409466Z", + "start_time": "2025-12-23T09:34:23.409612Z", + "end_time": "2025-12-23T09:34:23.482485Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:23.409255167Z", + "start_time": "2025-12-23T09:34:23.409323969Z", + "end_time": "2025-12-23T09:34:23.409438574Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:23.409203765Z", + "start_time": "2025-12-23T09:34:23.409280568Z", + "end_time": "2025-12-23T09:34:23.409318369Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:23.408881852Z", + "publish_time": "2025-12-23T09:34:23.409017557Z", + "first_worker_start": "2025-12-23T09:34:23.409392772Z", + "last_worker_end": "2025-12-23T09:34:23.525658Z", + "total_journey_time_ms": 116, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:23.409505377Z", + "start_time": "2025-12-23T09:34:23.409549978Z", + "end_time": "2025-12-23T09:34:23.40959718Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:23.409694Z", + "start_time": "2025-12-23T09:34:23.409825Z", + "end_time": "2025-12-23T09:34:23.525658Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 115 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:23.409448474Z", + "start_time": "2025-12-23T09:34:23.409495276Z", + "end_time": "2025-12-23T09:34:23.409561479Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:23.40932737Z", + "start_time": "2025-12-23T09:34:23.409392772Z", + "end_time": "2025-12-23T09:34:23.409439074Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 187, + "min_processing_ms": 72, + "max_processing_ms": 115, + "avg_processing_ms": 93, + "median_processing_ms": 115, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3687, + "slowest_section_id": 1, + "slowest_section_time_ms": 116 + } +} diff --git a/data/output/004193de0cd227b2587bb7f5c09bf28078c40d9d.json b/data/output/004193de0cd227b2587bb7f5c09bf28078c40d9d.json new file mode 100644 index 0000000..eecbcd6 --- /dev/null +++ b/data/output/004193de0cd227b2587bb7f5c09bf28078c40d9d.json @@ -0,0 +1,488 @@ +{ + "file_name": "004193de0cd227b2587bb7f5c09bf28078c40d9d.txt", + "total_words": 994, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "to", + "count": 39 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "security", + "count": 17 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "footage", + "count": 13 + }, + { + "word": "marketing", + "count": 10 + }, + { + "word": "for", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "He .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'Now .", + "length": 6 + }, + { + "text": "floor.", + "length": 6 + }, + { + "text": "' Time.", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "difference.", + "length": 11 + }, + { + "text": "Big brother?", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Brown Shoe Co.", + "length": 14 + }, + { + "text": "She said: 'Our .", + "length": 16 + }, + { + "text": "smartphone monitor.", + "length": 19 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "closer to the counter.", + "length": 22 + }, + { + "text": "Prism Skylabs CEO and .", + "length": 23 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Little changes make a big .", + "length": 27 + }, + { + "text": "15:11 EST, 24 October 2012 .", + "length": 28 + }, + { + "text": "16:42 EST, 24 October 2012 .", + "length": 28 + }, + { + "text": "put the yellow sweaters here?", + "length": 29 + }, + { + "text": "'This information helps managers .", + "length": 34 + }, + { + "text": "4 billion within the next five years.", + "length": 37 + }, + { + "text": "the seating area where they try on shoes.", + "length": 41 + }, + { + "text": "' He added that recently, while keeping .", + "length": 41 + }, + { + "text": "It lets me read the crowd no matter where I am.", + "length": 47 + }, + { + "text": "put out the yellow sweaters or the blue sweaters?", + "length": 49 + }, + { + "text": "'Plus the line moved noticeably faster,' he added.", + "length": 50 + }, + { + "text": "'The camera saw what I didn’t,' Mr Bradley said.", + "length": 50 + }, + { + "text": "make decisions such as, \"will it increase sales if I .", + "length": 54 + }, + { + "text": "changes, including [improved] traffic counting, have .", + "length": 54 + }, + { + "text": "stores they were like wallpaper and became meaningless.", + "length": 55 + }, + { + "text": "collectively impacted our sales, theft and labor costs.", + "length": 55 + }, + { + "text": "green or blue marker showing the most popular merchandise.", + "length": 58 + }, + { + "text": "Now, digital innovations like cloud technology and have given .", + "length": 63 + }, + { + "text": "He explained: 'The mobile video lets me stand above the action.", + "length": 63 + }, + { + "text": "cut off other customers, to pick up a cup of fresh orange juice.", + "length": 64 + }, + { + "text": "said his busy staff didn’t notice, yet it was so clear on his .", + "length": 65 + }, + { + "text": "American Apparel also saved 40per cent in fixed expenses by using .", + "length": 67 + }, + { + "text": "Jon Grander, vice president of asset and revenue management at the .", + "length": 68 + }, + { + "text": "footage to interpret consumer patterns that go unnoticed on the shop .", + "length": 70 + }, + { + "text": "an eye on the store's cameras during breakfast rush hour, he noticed .", + "length": 70 + }, + { + "text": "'Sales of the fresh OJ shot up 100 percent,' after he moved the juice .", + "length": 71 + }, + { + "text": "retailers cheaper, faster and easier ways to use their store's security .", + "length": 73 + }, + { + "text": "that people would often attempt to lean across the counter display, and .", + "length": 73 + }, + { + "text": ", which owns Famous Footwear, said: 'We were posting so many ads in our .", + "length": 73 + }, + { + "text": "we can put the ads where we have people’s undivided attention, such as .", + "length": 74 + }, + { + "text": "make micro-merchandising decisions such as, \"will it increase sales if I .", + "length": 74 + }, + { + "text": "co-founder, Steve Russell, said: 'This kind of information helps managers .", + "length": 75 + }, + { + "text": "A large 40per cent of people said they would avoid retailers that did so, if they were made aware.", + "length": 98 + }, + { + "text": "It’s especially important in the case of children and in sensitive areas of stores such as pharmacies.", + "length": 104 + }, + { + "text": "The Famous Footwear chain also uses security footage to help managers place promotions in the optimal spot.", + "length": 107 + }, + { + "text": "While not as sophisticated, surveillance cameras have actually been used in marketing for a number of years.", + "length": 108 + }, + { + "text": "\"' Owner Ivor Bradley said he uses his smartphone to watch streaming footage via the The Creamery's security cameras.", + "length": 117 + }, + { + "text": "\"' The Creamery, a San Francisco cafe, knows the benefits of fusing security footage with marketing analytics firsthand.", + "length": 120 + }, + { + "text": "Reaping rewards: The Creamery and Famous Footwear knows the benefits of fusing security footage with marketing analytics firsthand .", + "length": 132 + }, + { + "text": "RetailNext, a system that aggregates surveillance data for marketing purposes, according to Stacey Shulman, chief information officer.", + "length": 134 + }, + { + "text": "Ms Dixon added: 'It is crucial for retailers to disclose that camera-tracking information is being used for marketing and to offer shoppers a way to opt out.", + "length": 157 + }, + { + "text": "com polled more than 500 readers in September on their feelings towards these crossover camera practices, and 62per cent admitted they thought it was 'creepy'.", + "length": 159 + }, + { + "text": "' According to research and surveillance specialists, Walmart has employed an internal team to test new ways of using security footage to enhance in-store marketing.", + "length": 165 + }, + { + "text": "Surveillance: A coffee shops in San Francisco, The Creamery, uses footage from security cameras to study shoppers' movements in order to improve their marketing and services .", + "length": 175 + }, + { + "text": "Secret tools: According to research and surveillance specialists, Walmart has employed an internal team to test new ways of using security footage to enhance in-store marketing .", + "length": 178 + }, + { + "text": "Prism Skylabs software collects security camera data to reveal where and how people linger, which products are most popular, and the aisles people browse in the most and for how long .", + "length": 184 + }, + { + "text": "From a small coffee shop in San Francisco, to American Apparel and Walmart, retailers are using footage from security cameras to study shoppers’ movements in order to improve their marketing.", + "length": 193 + }, + { + "text": "But because retailers technically use this footage for security, they are under no obligation to ask customers for permission to record them, or even disclose the fact they are watching them via video.", + "length": 201 + }, + { + "text": "According to MarketsandMarkets, a global market research company, this kind of video surveillance as a service, which is also called VSaaS, is expected to grow in worth from $474 million in 2011 to nearly $2.", + "length": 208 + }, + { + "text": "Pam Dixon, executive director of the World Privacy Forum, said that while online shoppers are aware their purchasing habits will be tracked on the Internet, people have an expectation of privacy in brick-and-mortar stores.", + "length": 222 + }, + { + "text": "Marshal Cohen, chief retail analyst of The NPD Group, told Ad Week he used to employ security videos to watch shoppers as they left an escalator, wanting to discover which way they turned so he knew where to place displays.", + "length": 223 + }, + { + "text": "According to Ad Week, T-Mobile uses surveillance data to decide on the design of its stores, while American Apparel analyses its security footage figure out how best capture the most shopper attention with its clothing displays.", + "length": 228 + }, + { + "text": "' While a store's surveillance-turned-marketing footage is not used to identify specific shoppers (Prism hides customers’ identities by blurring their faces or only using their heat paths), it seems retailers want to keep this growing crossover camera practice quiet.", + "length": 269 + }, + { + "text": "As security cameras have grown more sophisticated, technology like the Prism Skylabs software, collects security camera data to reveal foot traffic, line formations, where and how people linger, which products are most popular, and the aisles people browse in the most and for how long.", + "length": 286 + }, + { + "text": "'A mobile security video lets me stand above the action, and read the crowd no matter where I am' Prism Skylabs uses security footage to create stop-motion time studies and heat maps to show customer movements, and which items customers touch or pick-up most through the day with a red, .", + "length": 288 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.49470700323581696 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:23.909841841Z", + "first_section_created": "2025-12-23T09:34:23.910170154Z", + "last_section_published": "2025-12-23T09:34:23.910476967Z", + "all_results_received": "2025-12-23T09:34:23.99417844Z", + "output_generated": "2025-12-23T09:34:23.994388949Z", + "total_processing_time_ms": 84, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 83, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:23.910170154Z", + "publish_time": "2025-12-23T09:34:23.910371062Z", + "first_worker_start": "2025-12-23T09:34:23.911020189Z", + "last_worker_end": "2025-12-23T09:34:23.993522Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:23.910942685Z", + "start_time": "2025-12-23T09:34:23.911020189Z", + "end_time": "2025-12-23T09:34:23.911130393Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:23.911188Z", + "start_time": "2025-12-23T09:34:23.911346Z", + "end_time": "2025-12-23T09:34:23.993522Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:23.911093292Z", + "start_time": "2025-12-23T09:34:23.911161094Z", + "end_time": "2025-12-23T09:34:23.911344402Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:23.91104749Z", + "start_time": "2025-12-23T09:34:23.911115692Z", + "end_time": "2025-12-23T09:34:23.911172295Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:23.910414464Z", + "publish_time": "2025-12-23T09:34:23.910476967Z", + "first_worker_start": "2025-12-23T09:34:23.911126993Z", + "last_worker_end": "2025-12-23T09:34:23.990576Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:23.911185895Z", + "start_time": "2025-12-23T09:34:23.911215196Z", + "end_time": "2025-12-23T09:34:23.911233197Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:23.911477Z", + "start_time": "2025-12-23T09:34:23.911594Z", + "end_time": "2025-12-23T09:34:23.990576Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 78 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:23.911174695Z", + "start_time": "2025-12-23T09:34:23.911218397Z", + "end_time": "2025-12-23T09:34:23.911250698Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:23.911090891Z", + "start_time": "2025-12-23T09:34:23.911126993Z", + "end_time": "2025-12-23T09:34:23.911140293Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 160, + "min_processing_ms": 78, + "max_processing_ms": 82, + "avg_processing_ms": 80, + "median_processing_ms": 82, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3062, + "slowest_section_id": 0, + "slowest_section_time_ms": 83 + } +} diff --git a/data/output/0041a0595eac3894d84e785b37f430dc951ac4e9.json b/data/output/0041a0595eac3894d84e785b37f430dc951ac4e9.json new file mode 100644 index 0000000..e9dc27e --- /dev/null +++ b/data/output/0041a0595eac3894d84e785b37f430dc951ac4e9.json @@ -0,0 +1,234 @@ +{ + "file_name": "0041a0595eac3894d84e785b37f430dc951ac4e9.txt", + "total_words": 313, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "mannone", + "count": 7 + }, + { + "word": "on", + "count": 7 + }, + { + "word": "sunderland", + "count": 6 + }, + { + "word": "for", + "count": 5 + }, + { + "word": "in", + "count": 5 + }, + { + "word": "is", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "4 miles and would take 11 hours .", + "length": 33 + }, + { + "text": "VIDEO An unbelievable result - Koeman .", + "length": 39 + }, + { + "text": "Let’s take it on the chin and go again.", + "length": 41 + }, + { + "text": "‘We lost 8-0 but we didn’t lose eight games 1-0.", + "length": 52 + }, + { + "text": "’ Three of Southampton’s goals were gifted by Sunderland.", + "length": 61 + }, + { + "text": "The round trip from Sunderland to Southampton and back is 653.", + "length": 62 + }, + { + "text": "‘It is difficult for us but it’s very difficult for them as well.", + "length": 69 + }, + { + "text": "Manager Gus Poyet claimed the defeat was his most embarrassing moment in football.", + "length": 82 + }, + { + "text": "‘After we conceded we threw in the towel and I include myself,’ admitted Mannone.", + "length": 85 + }, + { + "text": "Sunderland goalkeeper Vito Mannone wants his team-mates to refund travelling supporters .", + "length": 89 + }, + { + "text": "’ Ronald Koeman's Southampton side continued their impressive run with a dominant victory .", + "length": 93 + }, + { + "text": "Mannone's Sunderland side were thrashed 8-0 by Southampton on a miserable afternoon on the south coast .", + "length": 104 + }, + { + "text": "‘I will talk to the team to see if it is possible to pay their tickets and their trip,’ Mannone said.", + "length": 105 + }, + { + "text": "The squad had the day off on Sunday with Poyet wanting to allow the dust to settle before addressing them.", + "length": 106 + }, + { + "text": "Santiago Vergini set the tone for the afternoon by smashing the ball into his own net with a comical own goal .", + "length": 111 + }, + { + "text": "Some fans were in tears as Sunderland were humiliated 8-0 — one of the heaviest defeats in Barclays Premier League history.", + "length": 125 + }, + { + "text": "When they meet for training on Monday, Mannone is intent on arranging compensation for fans who made the 650-mile round trip to St Mary’s.", + "length": 140 + }, + { + "text": "Sunderland goalkeeper Vito Mannone is to ask his team-mates to refund the money spent on tickets and travel by fans who saw their team’s capitulation against Southampton.", + "length": 172 + }, + { + "text": "Liam Bridcutt and Santiago Vergini — with what will surely go down as own goal of the season — put the ball in their own net while Mannone’s poor clearance allowed Dusan Tadic to score past him.", + "length": 200 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.9223979711532593 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:24.411219047Z", + "first_section_created": "2025-12-23T09:34:24.411515659Z", + "last_section_published": "2025-12-23T09:34:24.411691967Z", + "all_results_received": "2025-12-23T09:34:24.473332751Z", + "output_generated": "2025-12-23T09:34:24.473483457Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:24.411515659Z", + "publish_time": "2025-12-23T09:34:24.411691967Z", + "first_worker_start": "2025-12-23T09:34:24.412260389Z", + "last_worker_end": "2025-12-23T09:34:24.472472Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:24.412192487Z", + "start_time": "2025-12-23T09:34:24.412260389Z", + "end_time": "2025-12-23T09:34:24.412302791Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:24.412431Z", + "start_time": "2025-12-23T09:34:24.412582Z", + "end_time": "2025-12-23T09:34:24.472472Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:24.412237089Z", + "start_time": "2025-12-23T09:34:24.412311792Z", + "end_time": "2025-12-23T09:34:24.412359393Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:24.412251389Z", + "start_time": "2025-12-23T09:34:24.412306091Z", + "end_time": "2025-12-23T09:34:24.412330092Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1784, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0041e52566b4873f3b7f8ccdcce6af9b2ffbbca6.json b/data/output/0041e52566b4873f3b7f8ccdcce6af9b2ffbbca6.json new file mode 100644 index 0000000..3958081 --- /dev/null +++ b/data/output/0041e52566b4873f3b7f8ccdcce6af9b2ffbbca6.json @@ -0,0 +1,724 @@ +{ + "file_name": "0041e52566b4873f3b7f8ccdcce6af9b2ffbbca6.txt", + "total_words": 1496, + "top_n_words": [ + { + "word": "the", + "count": 98 + }, + { + "word": "in", + "count": 33 + }, + { + "word": "s", + "count": 27 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "restaurant", + "count": 23 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "has", + "count": 21 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "his", + "count": 17 + }, + { + "word": "of", + "count": 17 + } + ], + "sorted_sentences": [ + { + "text": "M.", + "length": 2 + }, + { + "text": "O.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "6 D.", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Waitrose.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "40 Combal.", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Becky Evans .", + "length": 13 + }, + { + "text": "It's frustrating.", + "length": 17 + }, + { + "text": "to second place .", + "length": 17 + }, + { + "text": "Eye off the ball?", + "length": 17 + }, + { + "text": "São Paulo, Brazil .", + "length": 20 + }, + { + "text": "Zero Rivoli, Italy .", + "length": 20 + }, + { + "text": "Pellegrino and Acqua .", + "length": 22 + }, + { + "text": "regarded \"gastronomes\".", + "length": 23 + }, + { + "text": "15 Alinea Chicago, USA .", + "length": 24 + }, + { + "text": "29 Daniel New York, USA .", + "length": 25 + }, + { + "text": "11 Per Se New York, USA .", + "length": 25 + }, + { + "text": "22:35 EST, 29 April 2013 .", + "length": 26 + }, + { + "text": "05:27 EST, 30 April 2013 .", + "length": 26 + }, + { + "text": "49 Septime Paris, France .", + "length": 26 + }, + { + "text": "20 Narisawa Tokyo, Japan .", + "length": 26 + }, + { + "text": "33 The Fat Duck Bray, UK .", + "length": 26 + }, + { + "text": "32 Nahm Bangkok, Thailand .", + "length": 27 + }, + { + "text": "48 Quay Sydney, Australia .", + "length": 27 + }, + { + "text": "28 Mirazur Menton, France .", + "length": 27 + }, + { + "text": "36 Amber Hong Kong, China .", + "length": 27 + }, + { + "text": "52 Manresa Los Gatos, USA .", + "length": 27 + }, + { + "text": "13 The Ledbury London, UK .", + "length": 27 + }, + { + "text": "30 Aqua Wolfsburg, Germany .", + "length": 28 + }, + { + "text": "2 Noma Copenhagen, Denmark .", + "length": 28 + }, + { + "text": "46 Mani São Paulo, Brazil .", + "length": 28 + }, + { + "text": "34 Fäviken Järpen, Sweden .", + "length": 29 + }, + { + "text": "On the back of its success, .", + "length": 29 + }, + { + "text": "31 Biko Mexico City, Mexico .", + "length": 29 + }, + { + "text": "41 Piazza Duomo Alba, Italy .", + "length": 29 + }, + { + "text": "27 Le Calandre Rubano, Italy .", + "length": 30 + }, + { + "text": "16 L’Arpège Paris, France .", + "length": 30 + }, + { + "text": "9 Steirereck Vienna, Austria .", + "length": 30 + }, + { + "text": "17 Pujol Mexico City, Mexico .", + "length": 30 + }, + { + "text": "23 L’Astrance Paris, France .", + "length": 31 + }, + { + "text": "19 Le Bernardin New York, USA .", + "length": 31 + }, + { + "text": "8 Arzak San Sebastián, Spain .", + "length": 31 + }, + { + "text": "38 Restaurant Andre Singapore .", + "length": 31 + }, + { + "text": "14 Astrid y Gastón Lima, Peru .", + "length": 32 + }, + { + "text": "21 Attica Melbourne, Australia .", + "length": 32 + }, + { + "text": "35 Oud Sluis Sluis, Netherlands .", + "length": 33 + }, + { + "text": "45 Geranium Copenhagen, Denmark .", + "length": 33 + }, + { + "text": "26 Quique Dacosta Dénia, Spain .", + "length": 33 + }, + { + "text": "4 Mugaritz San Sebastián, Spain .", + "length": 34 + }, + { + "text": "43 Mr \u0026 Mrs Bund Shanghai, China .", + "length": 34 + }, + { + "text": "37 Vila Joya Albufeira, Portugal .", + "length": 34 + }, + { + "text": "22 Nihonryori RyuGin Tokyo, Japan .", + "length": 35 + }, + { + "text": "18 Le Chateaubriand Paris, France .", + "length": 35 + }, + { + "text": "44 Asador Etxebarri Atxondo, Spain .", + "length": 36 + }, + { + "text": "Blumenthal, who has three Michelin .", + "length": 36 + }, + { + "text": "5 Eleven Madison Park New York, USA .", + "length": 37 + }, + { + "text": "3 Osteria Francescana Modena, Italy .", + "length": 37 + }, + { + "text": "25 Hof Van Cleve Kruishoutem, Belgium .", + "length": 39 + }, + { + "text": "it into a multi-million pound business.", + "length": 39 + }, + { + "text": "47 The French Laundry Yountville, USA .", + "length": 39 + }, + { + "text": "1 El Celler de Can Roca Girona, Spain .", + "length": 39 + }, + { + "text": "10 Vendôme Bergisch Gladbach, Germany .", + "length": 40 + }, + { + "text": "39 8 1/2 Otto E Mezzo Bombana Hong Kong .", + "length": 41 + }, + { + "text": "7 Dinner by Heston Blumenthal London, UK .", + "length": 42 + }, + { + "text": "12 Frantzén/Lindeberg Stockholm, Sweden .", + "length": 42 + }, + { + "text": "24 L’Atelier Saint-Germain Paris, France .", + "length": 44 + }, + { + "text": "'It's exciting to see new people coming through.", + "length": 48 + }, + { + "text": "China also had three entries for the first time.", + "length": 48 + }, + { + "text": "The Ledbury, in London, run by head chef Brett .", + "length": 48 + }, + { + "text": "42 Schloss Schauenstein Fürstenau, Switzerland .", + "length": 49 + }, + { + "text": "World's 50 Best Restaurants Awards, sponsored by S.", + "length": 51 + }, + { + "text": "50 Central Lima, Peru51 Pierre Gagnaire Paris, France .", + "length": 55 + }, + { + "text": "Panna, were presented at the Guildhall in central London.", + "length": 57 + }, + { + "text": "He has published books and set up three other restaurants.", + "length": 58 + }, + { + "text": "with Channel 4, as well as signing a lucrative deal to promote .", + "length": 64 + }, + { + "text": "Scroll down for video and full list of the world's top restaurants .", + "length": 68 + }, + { + "text": "industry - including food critics, chefs, restaurateurs and highly .", + "length": 68 + }, + { + "text": "list was drawn from the votes of more than 900 leading figures in the .", + "length": 71 + }, + { + "text": "Blumenthal has gone on to become a television star, signing a £1m deal .", + "length": 73 + }, + { + "text": "Rene Redzepi's (right) restaurant Noma, in Copenhagen, Denmark, dropped .", + "length": 73 + }, + { + "text": "Graham (left), was the only other British restaurant on the list, while .", + "length": 73 + }, + { + "text": "stars, set the restaurant up with his ex-wife Zanna and the pair turned .", + "length": 73 + }, + { + "text": "It was the restaurant that helped propel Heston Blumenthal to culinary stardom.", + "length": 79 + }, + { + "text": "Heston Blumenthal's Fat Duck has dropped to 33 in the world's best restaurant list .", + "length": 84 + }, + { + "text": "The self-taught was unable to cook without injections because of his seafood allergy.", + "length": 85 + }, + { + "text": "The chef told The Times: 'I think there's a point where you are almost part of the furniture.", + "length": 94 + }, + { + "text": "The Roca brothers' avant-garde eatery  now heads a top-ten lineup dominated by Spanish cuisine.", + "length": 96 + }, + { + "text": "The Knightsbridge restaurant that serves historic British food climbed two places to number seven.", + "length": 98 + }, + { + "text": "A chef who beat a severe food allergy has had his restaurant named as one of the best in the world.", + "length": 99 + }, + { + "text": "With so much on his plate, it's perhaps little surprise the Fat Duck has slipped down the rankings.", + "length": 99 + }, + { + "text": "Heston Blumenthal's restaurant Dinner, at Mandarin Oriental Hyde Park, climbed two places to seven .", + "length": 100 + }, + { + "text": "The chef said food at the Fat Duck, in Bray, Berkshire, is 70 per cent better now than it used to be .", + "length": 102 + }, + { + "text": "El Celler de Can Roca run by Joan Roca and his two brothers has been named the world's best restaurant .", + "length": 104 + }, + { + "text": "A number of high-quality dishes with his name - including his Christmas pudding - are sold in Waitrose .", + "length": 104 + }, + { + "text": "The Ledbury in London, run by Australian Brett Graham, was the only other British entry on the list at 13.", + "length": 106 + }, + { + "text": "The Fat Duck was voted the best in in the world in 2005 and the set menu now costs diners £195 per person.", + "length": 107 + }, + { + "text": "Heston Blumenthal's The Fat Duck has been skewered as it drops 20 places in the world's top ten restaurants.", + "length": 108 + }, + { + "text": "The desserts were so popular that 18 months ago they were selling for £200 on eBay as stores ran out of them.", + "length": 110 + }, + { + "text": "Commercial interests: The chef, famed for his iconic eyewear, opens a Vision Express store in central London .", + "length": 110 + }, + { + "text": "However, the 2013 World's 50 Best Restaurants list - billed as the Oscars of the dining world- ranked it at 33.", + "length": 111 + }, + { + "text": "Blumenthal said while he was pleased to see the emergence of new talent, he admitted frustration at the result.", + "length": 111 + }, + { + "text": "Blumenthal added that the restaurant's three Michelin stars was a more important guide to the restaurant's quality.", + "length": 115 + }, + { + "text": "France and America were the two countries with the most restaurants on the list, both notching up six in the top 50.", + "length": 116 + }, + { + "text": "' He said the food served at The Fat Duck, formally named as the world's best restaurant, is 70 per cent better now.", + "length": 116 + }, + { + "text": "It was a bad day for British restaurants as the two Blumenthal establishments made up just three entries from the UK.", + "length": 117 + }, + { + "text": "Just 14 months later he was awarded a Michelin star - and now his restaurant has been named the 70th best in the world.", + "length": 119 + }, + { + "text": "Heston's Fat Duck restaurant has slipped down the pecking order after being world number one as 'brand Heston' has grown.", + "length": 121 + }, + { + "text": "With such commercial success, it will raise the question that Heston has taken the eye off the ball at his top restaurant.", + "length": 122 + }, + { + "text": "Over the years he has become a familiar face on television as his success in the kitchen has propelled him to national fame.", + "length": 124 + }, + { + "text": "Under his name, the upmarket supermarket have launched some high quality products - including orange-filled Christmas puddings.", + "length": 127 + }, + { + "text": "But after starting on a new diet the Swedish-born chef overcame eczema and asthma and opened Hedone, in Chiswick, London in 2011.", + "length": 129 + }, + { + "text": "Television sports presenter Jim Rosenthal, who was eating at the restaurant with his wife for her 58th birthday, was among the diners to fall ill.", + "length": 146 + }, + { + "text": "However, in 2009 it closed down for two weeks after it suffered from the largest ever recorded norovirus outbreak with over 400 diners falling unwell.", + "length": 150 + }, + { + "text": "Spanish restaurant El Celler de Can Roca, which has featured caramelised olives served on a bonsai tree on its menu, has been named the best in the world.", + "length": 154 + }, + { + "text": "Blumenthal's disappointment at the 20 place drop for The Fat Duck will probably have been lessened by a climb up the rankings for his latest venture, Dinner.", + "length": 157 + }, + { + "text": "Early days: Heston Blumenthal in 2002 at the Fat Duck as he presented 'Kitchen Chemistry' on the Discovery Channel where he explained the science of cooking .", + "length": 158 + }, + { + "text": "The restaurant in Bray, Berkshire, famed for snail porridge and mustard ice cream, has spent most of the past decade in the top five restaurants in the world.", + "length": 158 + }, + { + "text": "British-based Mikael Jonsson (pictured) initially trained as a solicitor because his condition made it impractical to follow his dream of opening a restaurant.", + "length": 159 + }, + { + "text": "The family run restaurant in Girona, north east Spain, toppled Denmark's Noma from its position to claim the title after spending two years as runner-up on the list.", + "length": 165 + }, + { + "text": "The restaurant, famous for serving unusual astronomic dishes such as snail porridge and egg and bacon ice cream was founded in 1995 and has spent ten years in the top 50 list.", + "length": 175 + }, + { + "text": "Eight years after topping the list of the world’s best eateries, the Fat Duck in Bray, Berkshire, last night plummeted down the league table to 33rd place - down from 13th last year.", + "length": 184 + }, + { + "text": "El Celler de Can Roca, run by the three Roca siblings, is well-known for its free-style cooking and has long been hailed for its combination of Catalan dishes and cutting edge techniques.", + "length": 187 + }, + { + "text": "In November last year, two chefs working at the restaurant were killed when a double-decker bus ploughed into the taxi in which they were travelling during a promotional tour of Hong Kong.", + "length": 188 + }, + { + "text": "With Joan Roca heading up the kitchen and his brothers Jordi and Josep as head pastry chef and head sommelier respectively, it has built up a reputation as one of Spain's most exciting places to eat.", + "length": 199 + }, + { + "text": "Dishes that gave the Fat Duck fame: (Clockwise from top left) Snail porridge, which became so well known it was almost Heston's signature dish; Pigeon with Pistachio; radish, ravioli and oyster; and Quail Jelly .", + "length": 212 + }, + { + "text": "Positions two and three on this year's list were taken by chef Rene Redzepi's Noma, in Copenhagen, which has famously served moss, lichen and bone marrow, and Massimo Bottura's comparatively low-key Osteria Francescana in Modena, northern Italy.", + "length": 245 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.4304288178682327 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:24.912480949Z", + "first_section_created": "2025-12-23T09:34:24.913960609Z", + "last_section_published": "2025-12-23T09:34:24.914296622Z", + "all_results_received": "2025-12-23T09:34:25.018114906Z", + "output_generated": "2025-12-23T09:34:25.018924939Z", + "total_processing_time_ms": 106, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 103, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:24.913960609Z", + "publish_time": "2025-12-23T09:34:24.914164817Z", + "first_worker_start": "2025-12-23T09:34:24.91473644Z", + "last_worker_end": "2025-12-23T09:34:25.013237Z", + "total_journey_time_ms": 99, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:24.914711439Z", + "start_time": "2025-12-23T09:34:24.914793842Z", + "end_time": "2025-12-23T09:34:24.914912847Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:24.915009Z", + "start_time": "2025-12-23T09:34:24.915146Z", + "end_time": "2025-12-23T09:34:25.013237Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 98 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:24.914799943Z", + "start_time": "2025-12-23T09:34:24.914873346Z", + "end_time": "2025-12-23T09:34:24.91497055Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:24.914671938Z", + "start_time": "2025-12-23T09:34:24.91473644Z", + "end_time": "2025-12-23T09:34:24.914782342Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:24.914187618Z", + "publish_time": "2025-12-23T09:34:24.914296622Z", + "first_worker_start": "2025-12-23T09:34:24.914829744Z", + "last_worker_end": "2025-12-23T09:34:25.017373Z", + "total_journey_time_ms": 103, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:24.914821044Z", + "start_time": "2025-12-23T09:34:24.914877946Z", + "end_time": "2025-12-23T09:34:24.91497515Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:24.915064Z", + "start_time": "2025-12-23T09:34:24.915205Z", + "end_time": "2025-12-23T09:34:25.017373Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 102 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:24.914827644Z", + "start_time": "2025-12-23T09:34:24.914869145Z", + "end_time": "2025-12-23T09:34:24.914959249Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:24.914777442Z", + "start_time": "2025-12-23T09:34:24.914829744Z", + "end_time": "2025-12-23T09:34:24.914866845Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 200, + "min_processing_ms": 98, + "max_processing_ms": 102, + "avg_processing_ms": 100, + "median_processing_ms": 102, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4370, + "slowest_section_id": 1, + "slowest_section_time_ms": 103 + } +} diff --git a/data/output/0041f3300bab9974833a5baf13a1e5e525d6d6f8.json b/data/output/0041f3300bab9974833a5baf13a1e5e525d6d6f8.json new file mode 100644 index 0000000..2c72b48 --- /dev/null +++ b/data/output/0041f3300bab9974833a5baf13a1e5e525d6d6f8.json @@ -0,0 +1,270 @@ +{ + "file_name": "0041f3300bab9974833a5baf13a1e5e525d6d6f8.txt", + "total_words": 396, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "on", + "count": 10 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "for", + "count": 7 + }, + { + "word": "scooter", + "count": 6 + }, + { + "word": "was", + "count": 6 + }, + { + "word": "by", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Becky Evans .", + "length": 13 + }, + { + "text": "05:57 EST, 29 April 2013 .", + "length": 26 + }, + { + "text": "03:44 EST, 29 April 2013 .", + "length": 26 + }, + { + "text": "the scooter battery almost dead.", + "length": 32 + }, + { + "text": "' Government policy rules on powered .", + "length": 38 + }, + { + "text": "speed limit of 8mph can be used on the road and the pavement.", + "length": 61 + }, + { + "text": "The man  is thought to have travelled at least five miles on the .", + "length": 67 + }, + { + "text": "wheelchairs and mobility scooters state that vehicles with an upper .", + "length": 69 + }, + { + "text": "Getting stuck behind a caravan or a tractor can be frustrating enough.", + "length": 70 + }, + { + "text": "battery-powered machine before he was found by police slowly heading .", + "length": 70 + }, + { + "text": "north on the main carriageway of the A23 towards Gatwick Airport with .", + "length": 71 + }, + { + "text": "'When on the pavement you should follow the guidance and rules for pedestrians.", + "length": 79 + }, + { + "text": "'He's tied his dog with a piece of rope to the disabled buggy and was taking it for walkies.", + "length": 92 + }, + { + "text": "The dog-owner was pictured exercising his pet by tying it to the back of a mobility scooter .", + "length": 93 + }, + { + "text": "Cars were having to slam on their brakes and overtake, where are the police when you want them.", + "length": 95 + }, + { + "text": "However, the rules state: 'When you are on the road you should obey the guidance and rules for other vehicles.", + "length": 110 + }, + { + "text": "Earlier this month a confused tourist was seen driving the wrong way along a 70mph dual carriageway on a buggy .", + "length": 112 + }, + { + "text": "The confused tourist was spotted by concerned drivers as he rode at 8mph along the 70mph A27 and A23 near Brighton, Sussex.", + "length": 123 + }, + { + "text": "One witness said: 'How is the man in this disabled scooter allowed to drive in the middle of a busy road taking his dog for a walk.", + "length": 131 + }, + { + "text": "The green buggy was pictured by one frustrated motorist driving through the village of Corfe Castle, Dorset, where the speed limit is 30mph.", + "length": 140 + }, + { + "text": "But spare a thought for the drivers who were following this man 'walking' his dog through a village in his mobility scooter with a top speed of 8mph.", + "length": 149 + }, + { + "text": "' But despite the perceived dangers, police confirmed that it is legal to drive a mobility scooter on the road and said no offence had been committed.", + "length": 150 + }, + { + "text": "A spokesman for Dorset Police said: 'Obviously it would be advisable for them to use the pavement, but the photo doesn't actually show any road related offences.", + "length": 161 + }, + { + "text": "' Earlier this month, police went to the rescue of a 62-year-old man on a mobility scooter after he ended up driving the wrong way along the hard shoulder of a busy dual carriageway.", + "length": 182 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4801985025405884 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:25.414607286Z", + "first_section_created": "2025-12-23T09:34:25.414946999Z", + "last_section_published": "2025-12-23T09:34:25.415125107Z", + "all_results_received": "2025-12-23T09:34:25.485842957Z", + "output_generated": "2025-12-23T09:34:25.486016264Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:25.414946999Z", + "publish_time": "2025-12-23T09:34:25.415125107Z", + "first_worker_start": "2025-12-23T09:34:25.415799634Z", + "last_worker_end": "2025-12-23T09:34:25.484895Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:25.415771233Z", + "start_time": "2025-12-23T09:34:25.415844436Z", + "end_time": "2025-12-23T09:34:25.415891938Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:25.415915Z", + "start_time": "2025-12-23T09:34:25.416059Z", + "end_time": "2025-12-23T09:34:25.484895Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:25.415722031Z", + "start_time": "2025-12-23T09:34:25.415799634Z", + "end_time": "2025-12-23T09:34:25.415842036Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:25.415773833Z", + "start_time": "2025-12-23T09:34:25.415846836Z", + "end_time": "2025-12-23T09:34:25.415867837Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2213, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/0042de86fd04a89719fbde32be64906b95344dc8.json b/data/output/0042de86fd04a89719fbde32be64906b95344dc8.json new file mode 100644 index 0000000..262448f --- /dev/null +++ b/data/output/0042de86fd04a89719fbde32be64906b95344dc8.json @@ -0,0 +1,444 @@ +{ + "file_name": "0042de86fd04a89719fbde32be64906b95344dc8.txt", + "total_words": 1204, + "top_n_words": [ + { + "word": "the", + "count": 91 + }, + { + "word": "in", + "count": 38 + }, + { + "word": "it", + "count": 35 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "boat", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "for", + "count": 16 + }, + { + "word": "is", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "It .", + "length": 4 + }, + { + "text": "Taylor, who lives in West London.", + "length": 33 + }, + { + "text": "I thought \"this could be quite special\".", + "length": 40 + }, + { + "text": "Eventually the technology trickled down to boats.", + "length": 49 + }, + { + "text": "Rock bottom: The Fixitor in its pre-renovated state .", + "length": 53 + }, + { + "text": "'It's very typical of the boats from the early 1900s.", + "length": 53 + }, + { + "text": "Fixitor's build matches those used in the first event.", + "length": 54 + }, + { + "text": "It was not long before he realised the ship's potential.", + "length": 56 + }, + { + "text": "He said: 'I looked at it and realised it was an interesting hull.", + "length": 65 + }, + { + "text": "It now has an estimated value 1,000 times that amount at £400,000.", + "length": 67 + }, + { + "text": "bought it in July 2009 at the Traditional Thames Boat Rally for Mr .", + "length": 68 + }, + { + "text": "then remained in the yard until Classic Restoration Services of Windsor .", + "length": 73 + }, + { + "text": "'I've never seen anything that old restored to such a beautiful standard.", + "length": 73 + }, + { + "text": "Today, 'Fixitor' is fitted with a 1950's MG 1500cc 4 cylinder sports car engine.", + "length": 80 + }, + { + "text": "'The Americans are fanatical about this sort of boat, but this is one of the best.", + "length": 82 + }, + { + "text": "'It has no flat surfaces on the hull or on the deck and this helps give us the date.", + "length": 84 + }, + { + "text": "According to Mr Messer, the most valuable are those built before the First World War.", + "length": 85 + }, + { + "text": "It also had a lightweight construction, a detail which indicated it is a racing boat.", + "length": 85 + }, + { + "text": "Three commemorative plaques are now screwed to the dashboard to mark this special achievement.", + "length": 94 + }, + { + "text": "' In its heyday: The Fixitor was built with a classic state-of-the-art 1900 racing yacht design .", + "length": 97 + }, + { + "text": "Mr Messer said: 'I have been quoted by American experts that it could make half a million dollars.", + "length": 98 + }, + { + "text": "Her late husband Michael, who named it Fixitor, bought it in 1949 from an army officer stationed abroad.", + "length": 104 + }, + { + "text": "Hull lot of love: The Fixitor would have disintegrated further without the much-needed renovation work .", + "length": 104 + }, + { + "text": "Back then the boat had been sitting in a garden in Fleet for at least ten years and was without an engine.", + "length": 106 + }, + { + "text": "Mike James, 63, Commodore for the Classic Offshore Powerboat Club, said: 'The boat is absolutely stunning.", + "length": 106 + }, + { + "text": "Its condition has deteriorated massively in comparison to the Fixitor, and it no longer sits on the water.", + "length": 106 + }, + { + "text": "Michael named her Fixitor after an old Navy joke - \"fix it or leave it\" - because it required so much work.", + "length": 107 + }, + { + "text": "' Sporting history: During renovation, it was discovered that the boat had taken part in the 1908 Olympics .", + "length": 108 + }, + { + "text": "' The figure derives from price comparisons with other wooden motorboats being sold at North American auctions.", + "length": 111 + }, + { + "text": "The distinctive curved 'Art Nouveau' structure of the hull is typical of early English racers used in the event.", + "length": 112 + }, + { + "text": "Another early powerboat from 1912 is still on the water on the Isle of Wight, but lacks the history of  Fixitor.", + "length": 113 + }, + { + "text": "Shipshape: The boat is back to its former glory, but at one time it lay neglected in a Thames boatyard for 14 years .", + "length": 117 + }, + { + "text": "Ship ahoy: The Fixitor was bought by collector Wint Taylor for £400 in 2009 and after renovation is worth £400,000 .", + "length": 118 + }, + { + "text": "'It also has holes for tow ropes, which is a sign of an Olympic trial boat as these were the only ones which had them.", + "length": 118 + }, + { + "text": "Stephen Messer, a boatbuilder for Classic Restoration Services, spotted the vessel near the company's exhibition tent.", + "length": 118 + }, + { + "text": "And a similar French motorboat from the same era is held at the Maine Maritime Museum in Bath, USA, but no longer floats.", + "length": 121 + }, + { + "text": "The couple would cruise on inland waterways and the upper Thames and together they owned her until selling Fixitor in 1995.", + "length": 123 + }, + { + "text": "Mr Taylor didn't disclose how much he had spent on the restoration, but says it was worth every penny given its current value.", + "length": 126 + }, + { + "text": "The wooden 'trials boat', which was snapped up for a mere £400 four years ago, has been lovingly restored to its former glory.", + "length": 127 + }, + { + "text": "Further evidence confirms it was also used in the 1908 Olympics - the only time power boat racing has ever featured at the event.", + "length": 129 + }, + { + "text": "' The £400 purchase, from Michael Dennett's boatbuilders yard in Chertsey exhibiting at the rally, has proved a shrewd investment.", + "length": 131 + }, + { + "text": "' The British International Harmsworth Trophy still exists today for modern powerboats, but they now reach speeds of around 100mph.", + "length": 131 + }, + { + "text": "During Fixitor's restoration, traces of 'British Racing Green' paint were found underneath the hull - the colour used for the Games.", + "length": 132 + }, + { + "text": "Amazingly, it is also the only boat to appear in all three royal pageants marking the Queen's coronation, Silver and Diamond Jubilee.", + "length": 133 + }, + { + "text": "These were raced in the first-ever British International Harmsworth Trophy in 1903, which pitted wooden powerboats against each other.", + "length": 134 + }, + { + "text": "Mr Messer said: 'They described it to me and said it was in deep storage, which I think basically means it is a pile of wood on the floor.", + "length": 138 + }, + { + "text": "He said: 'Hard chine hulls rise up to 'plane' on the surface of the water, thereby reducing the boat's resistance and increasing its speed.", + "length": 139 + }, + { + "text": "The unreliable engine propellers were not replaced until the First World War, when they developed as lighter, efficient engines used in aircraft.", + "length": 145 + }, + { + "text": "The original seat position and special connection points for tow ropes located on board tied in with the appearance of early racing trials boats.", + "length": 145 + }, + { + "text": "Originally, the boat would have had a rather more basic petrol unit used in aircraft, but they were very heavy in comparison to their power output.", + "length": 147 + }, + { + "text": "The 25ft-long vessel, believed to have been built by boatmakers Thornycroft in 1902, was purchased by and restored for transport collector Wint Taylor.", + "length": 151 + }, + { + "text": "Mr Taylor, who is an engineer aged in his 60s, said: 'It's the oldest power boat in Europe - there are other boats around, but they're not still afloat.", + "length": 152 + }, + { + "text": "Mr Messer contacted the Classic Boat Museum on the Isle of Wight, which first identified the hull as a classic state of the art 1900 racing yacht design.", + "length": 153 + }, + { + "text": "Classic Offshore Powerboat club secretary Martin Napier said the curved design for the hull, also known as 'hard chine', was common for early racing boats.", + "length": 155 + }, + { + "text": "According to Mr Taylor, the clues were in the fixings on the mahogany hull planks, which were carefully doubled up, which is a sign of high quality craftmanship.", + "length": 161 + }, + { + "text": "Clues on board: Wint Taylor knew this was a high-quality boat by looking at the craftmanship and lightweight construction, which are indicative of a racing boat .", + "length": 162 + }, + { + "text": "The oldest motorboat still afloat in Europe is also the only one to feature in the three royal pageants marking the Queen's coronation, Silver and Diamond Jubilee .", + "length": 164 + }, + { + "text": "Rediscovered and restored, the Fixitor was reunited with former owner Ann Hawkins of Farnborough at this year's Henley rally, where it won the People's Choice Award.", + "length": 165 + }, + { + "text": "Built in the early 20th Century, it is one of the oldest motorboats in the world and its current engine means it reaches at speeds of up to 18mph - the same as its heyday.", + "length": 171 + }, + { + "text": "'Fixitor', named by previous owner Michael Hawkins in 1949 after joking to 'fix it or leave it', because it required so much restoration work back then, has a fascinating history.", + "length": 179 + }, + { + "text": "The oldest motorboat still afloat in Europe, which was used in the 1908 Olympics, at one time lay neglected in a Thames boatyard for 14 years - but it has now found its sea legs again after a major renovation.", + "length": 209 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.429956778883934 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:25.915404069Z", + "first_section_created": "2025-12-23T09:34:25.915753983Z", + "last_section_published": "2025-12-23T09:34:25.916123798Z", + "all_results_received": "2025-12-23T09:34:26.063248727Z", + "output_generated": "2025-12-23T09:34:26.063463136Z", + "total_processing_time_ms": 148, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 147, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:25.915753983Z", + "publish_time": "2025-12-23T09:34:25.915990992Z", + "first_worker_start": "2025-12-23T09:34:25.916702321Z", + "last_worker_end": "2025-12-23T09:34:26.060367Z", + "total_journey_time_ms": 144, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:25.91666742Z", + "start_time": "2025-12-23T09:34:25.916735322Z", + "end_time": "2025-12-23T09:34:25.916819726Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:25.916992Z", + "start_time": "2025-12-23T09:34:25.917132Z", + "end_time": "2025-12-23T09:34:26.060367Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 143 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:25.916806325Z", + "start_time": "2025-12-23T09:34:25.917112238Z", + "end_time": "2025-12-23T09:34:25.917206741Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:25.916654319Z", + "start_time": "2025-12-23T09:34:25.916702321Z", + "end_time": "2025-12-23T09:34:25.916760623Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:25.916048295Z", + "publish_time": "2025-12-23T09:34:25.916123798Z", + "first_worker_start": "2025-12-23T09:34:25.916801325Z", + "last_worker_end": "2025-12-23T09:34:25.994458Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:25.916848727Z", + "start_time": "2025-12-23T09:34:25.91692523Z", + "end_time": "2025-12-23T09:34:25.916960731Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:25.917283Z", + "start_time": "2025-12-23T09:34:25.917398Z", + "end_time": "2025-12-23T09:34:25.994458Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:25.916845227Z", + "start_time": "2025-12-23T09:34:25.917004133Z", + "end_time": "2025-12-23T09:34:25.917056435Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:25.916768624Z", + "start_time": "2025-12-23T09:34:25.916801325Z", + "end_time": "2025-12-23T09:34:25.916828826Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 220, + "min_processing_ms": 77, + "max_processing_ms": 143, + "avg_processing_ms": 110, + "median_processing_ms": 143, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3454, + "slowest_section_id": 0, + "slowest_section_time_ms": 144 + } +} diff --git a/data/output/0042ee63824a6617fbde2be8822aef7150d826c0.json b/data/output/0042ee63824a6617fbde2be8822aef7150d826c0.json new file mode 100644 index 0000000..b342683 --- /dev/null +++ b/data/output/0042ee63824a6617fbde2be8822aef7150d826c0.json @@ -0,0 +1,270 @@ +{ + "file_name": "0042ee63824a6617fbde2be8822aef7150d826c0.txt", + "total_words": 658, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "to", + "count": 25 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "nhs", + "count": 13 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "they", + "count": 11 + }, + { + "word": "was", + "count": 11 + }, + { + "word": "for", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "His partner Hancox also worked for the NHS.", + "length": 43 + }, + { + "text": "'I also want to stress this is by no means a victimless crime.", + "length": 62 + }, + { + "text": "'Their determined attempts to evade justice compound the crimes.", + "length": 64 + }, + { + "text": "Leigh was sentenced to 44 months and Hancox was given 24 months in prison.", + "length": 74 + }, + { + "text": "They also bought a brand new Jaguar convertible and a Mercedes from the profits of the scam.", + "length": 92 + }, + { + "text": "The couple also bought a brand new Jaguar convertible and a Mercedes from the profits of the scam .", + "length": 99 + }, + { + "text": "All suspicions of fraud reported to NHS Protect will be followed up, and investigated wherever appropriate.", + "length": 107 + }, + { + "text": "'This was not a get-rich-quick scheme - this was a sustained criminal enterprise stretching over seven years.", + "length": 109 + }, + { + "text": "Every penny they plundered is money that was diverted away from someone in desperate need of medical treatment.", + "length": 111 + }, + { + "text": "Seven years is a long time to see the error of your ways but these individuals showed no remorse for their actions.", + "length": 115 + }, + { + "text": "'I am delighted this case has finally been concluded and these con artists have been exposed and brought to justice.", + "length": 116 + }, + { + "text": "'We press for prosecution of offenders and seek the strongest possible sanctions, so public money is not diverted from patient care.", + "length": 132 + }, + { + "text": "It is the NHS they scammed out of thousands of pounds - money which the NHS badly needs for the treatment of people with genuine illness.", + "length": 137 + }, + { + "text": "They sparked a five-year manhunt after being interviewed by police about the fraud in 2008, before fleeing to the Turkish part of Cyprus.", + "length": 137 + }, + { + "text": "Leigh worked for the NHS in purchasing and was responsible for buying mostly computers and other office equipment for various NHS departments.", + "length": 142 + }, + { + "text": "'In fact, we believe the very reason they switched companies later on was to cover up what they had done should there ever be an investigation.", + "length": 143 + }, + { + "text": "The pair also used Bibi's IT Solutions Ltd and Wiscom Technology Ltd as a front to disguise their ongoing fraud between January 2001 and October 2008.", + "length": 150 + }, + { + "text": "It was only when they moved to the south part of the island, the Republic of Cyprus, that the couple were brought back to England last year to face justice.", + "length": 156 + }, + { + "text": "We also seized hundreds of email exchanges which showed they went to great lengths to cover their tracks by cooking the books to give them an air of authenticity.", + "length": 162 + }, + { + "text": "A couple who swindled £1million from the NHS to fund their luxury lifestyle of globetrotting and flashy cars have been jailed for a total of five-and-a-half years.", + "length": 164 + }, + { + "text": "At Manchester Crown Court today the pair were jailed after pleading guilty to conspiracy to defraud and conspiracy to conceal criminal property at an earlier hearing.", + "length": 166 + }, + { + "text": "Sgt Laura Walters, of GMP, said: 'This couple were involved in an well-orchestrated and meticulously planned conspiracy to defraud the NHS out of hundreds of thousands of pounds.", + "length": 178 + }, + { + "text": "John Leigh, 54, and Deborah Hancox, 45, from Rochdale, Greater Manchester, were involved in a seven-year conspiracy supplying equipment and services to the NHS at inflated prices.", + "length": 179 + }, + { + "text": "It is thought the value of the fraud was more than £1million, with the couple pocketing about £300,000 which they used to buy holiday homes in Cyprus, Dubai and the Lake District.", + "length": 181 + }, + { + "text": "' Sue Frith, managing director of NHS Protect, said: 'This was a serious fraud against the NHS, cynically carried out by two individuals abusing their positions of trust and authority.", + "length": 184 + }, + { + "text": "John Leigh, 54, (left) and Deborah Hancox, 45, from Rochdale, Greater Manchester, were involved in a seven-year conspiracy supplying equipment and services to the NHS at inflated prices .", + "length": 187 + }, + { + "text": "'It is also clear they used the proceeds of their criminal activity to live a lavish lifestyle - the couple had a holiday home in the Lake District and they also invested in property in the United Arab Emirates and Cyprus.", + "length": 222 + }, + { + "text": "For more than seven years, Leigh used a company called Action Direct Technology Ltd - of which his partner Hancox was the owner, sole shareholder and company director - to buy overpriced goods and defraud his employer the NHS.", + "length": 226 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6941009163856506 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:26.416353158Z", + "first_section_created": "2025-12-23T09:34:26.416705872Z", + "last_section_published": "2025-12-23T09:34:26.416968483Z", + "all_results_received": "2025-12-23T09:34:26.496602992Z", + "output_generated": "2025-12-23T09:34:26.496807Z", + "total_processing_time_ms": 80, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 79, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:26.416705872Z", + "publish_time": "2025-12-23T09:34:26.416968483Z", + "first_worker_start": "2025-12-23T09:34:26.417501804Z", + "last_worker_end": "2025-12-23T09:34:26.495648Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:26.417499204Z", + "start_time": "2025-12-23T09:34:26.417560006Z", + "end_time": "2025-12-23T09:34:26.417618909Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:26.41777Z", + "start_time": "2025-12-23T09:34:26.417929Z", + "end_time": "2025-12-23T09:34:26.495648Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:26.4174018Z", + "start_time": "2025-12-23T09:34:26.417501804Z", + "end_time": "2025-12-23T09:34:26.417578007Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:26.417536506Z", + "start_time": "2025-12-23T09:34:26.417612109Z", + "end_time": "2025-12-23T09:34:26.417666811Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 77, + "min_processing_ms": 77, + "max_processing_ms": 77, + "avg_processing_ms": 77, + "median_processing_ms": 77, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3865, + "slowest_section_id": 0, + "slowest_section_time_ms": 78 + } +} diff --git a/data/output/004315e86f1eb5ec9c3aa6ee439eedf755862371.json b/data/output/004315e86f1eb5ec9c3aa6ee439eedf755862371.json new file mode 100644 index 0000000..f53dbe0 --- /dev/null +++ b/data/output/004315e86f1eb5ec9c3aa6ee439eedf755862371.json @@ -0,0 +1,294 @@ +{ + "file_name": "004315e86f1eb5ec9c3aa6ee439eedf755862371.txt", + "total_words": 628, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "commander", + "count": 15 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "hms", + "count": 13 + }, + { + "word": "she", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "west", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Jennifer Newton .", + "length": 17 + }, + { + "text": "'There are drawbacks though.", + "length": 28 + }, + { + "text": "Commander West recently described .", + "length": 35 + }, + { + "text": "Years at sea probably explains why I'm single.", + "length": 46 + }, + { + "text": "Andrew Whitlum but the pair separated in 2006.", + "length": 46 + }, + { + "text": "But every person in the military makes sacrifices.", + "length": 50 + }, + { + "text": "Lots of women in the services have challenging roles.", + "length": 53 + }, + { + "text": "It's just that I happen to be newsworthy at the moment.", + "length": 55 + }, + { + "text": "herself as ‘single’, having been married to former Royal Navy pilot .", + "length": 73 + }, + { + "text": "Divorced Commander West, 42, took charge of Type 23 frigate HMS Portland in May 2012.", + "length": 85 + }, + { + "text": "In a statement a spokesman said no details would be given of the inquiry or its outcome.", + "length": 88 + }, + { + "text": "It has been confirmed that Commander Sarah West has been 'removed from command' from the ship.", + "length": 94 + }, + { + "text": "She was promoted to commander in January 2012 and assumed command of HMS Portland in May that year.", + "length": 99 + }, + { + "text": "Her time on HMS Pembroke included eight and a half months deployed on operations in the Arabian Gulf.", + "length": 101 + }, + { + "text": "She told the Daily Mirror: 'I'm really proud to be the first woman but I'm not reinventing the wheel.", + "length": 101 + }, + { + "text": "The ship recently returned to the UK following a seven month deployment in the Atlantic and Caribbean .", + "length": 103 + }, + { + "text": "Lieutenant Commander Gray is Portland’s operations officer, in charge of weapons and defence systems.", + "length": 103 + }, + { + "text": "However, a spokesman added that she would remain in the service and would be re-appointed to another post.", + "length": 106 + }, + { + "text": "'This is an internal matter between the individual and her senior officers and we will not give further details of the removal.", + "length": 127 + }, + { + "text": "The Royal Navy has confirmed that Commander Sarah West has been removed from command of HMS Portland amid claims of an affair .", + "length": 127 + }, + { + "text": "However, he added: 'We can confirm that Commander Sarah West, Commanding Officer of HMS Portland, has been removed from Command.", + "length": 128 + }, + { + "text": "Commander West, who was captain of HMS Portland, pictured, was the first woman to be in charge of a frontline Royal Navy warship .", + "length": 130 + }, + { + "text": "The first female captain of a frontline Royal Navy warship has been removed from her post amid claims she had an affair with another shipmate.", + "length": 142 + }, + { + "text": "Commander West, left, left the vessel last month following claims that she had a relationship with Lieutenant Commander Richard Gray, pictured right .", + "length": 150 + }, + { + "text": "'Commander West will continue to serve in the Royal Navy and she will be reappointed to a post where her skills and experience can be used to best effect.", + "length": 154 + }, + { + "text": "Selected for sea command in 2008, she commanded minesweepers HMS Ramsey, HMS Penzance, HMS Pembroke and HMS Shoreham between April 2009 and December 2011.", + "length": 154 + }, + { + "text": "' Commander West, who was born and raised in Lincolnshire, studied maths at the University of Hertfordshire before joining Britannia Royal Naval College in September 1995.", + "length": 171 + }, + { + "text": "At the time the Royal Navy said it would conduct an inquiry into whether she breached the armed forces' code of conduct, which governs personal relationships within the military.", + "length": 178 + }, + { + "text": "However, she left the vessel last month following allegations that she had a relationship with newly-wed Lieutenant Commander Richard Gray, who married his wife Melissa in December.", + "length": 181 + }, + { + "text": "She was selected as a small ship navigator and joined HMS Cottesmore in 1997 and subsequent appointments included Officer of the Watch of HMS Sheffield and Navigating Officer of HMS Somerset.", + "length": 191 + }, + { + "text": "' The spokesman added that Commander West's second-in-command has taken over the running of the ship, which returned to the UK on Saturday after a seven-month deployment in the Atlantic and Caribbean.", + "length": 200 + }, + { + "text": "In 2007 she joined the Permanent Joint Headquarters and was responsible for co-ordinating the UK contribution to operations in the Balkans, which included the period that saw Kosovo’s declaration of independence.", + "length": 214 + }, + { + "text": "After her appointment two years ago Commander West described it as the greatest achievement of her career, but in an interview earlier this year she explained how work commitments made it difficult to have a relationship.", + "length": 221 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5754134654998779 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:26.917774266Z", + "first_section_created": "2025-12-23T09:34:26.918153581Z", + "last_section_published": "2025-12-23T09:34:26.918358889Z", + "all_results_received": "2025-12-23T09:34:26.980080577Z", + "output_generated": "2025-12-23T09:34:26.980261884Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:26.918153581Z", + "publish_time": "2025-12-23T09:34:26.918358889Z", + "first_worker_start": "2025-12-23T09:34:26.918898111Z", + "last_worker_end": "2025-12-23T09:34:26.979145Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:26.918821108Z", + "start_time": "2025-12-23T09:34:26.918898111Z", + "end_time": "2025-12-23T09:34:26.918990215Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:26.919043Z", + "start_time": "2025-12-23T09:34:26.919189Z", + "end_time": "2025-12-23T09:34:26.979145Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:26.91888501Z", + "start_time": "2025-12-23T09:34:26.918949813Z", + "end_time": "2025-12-23T09:34:26.919063918Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:26.918843509Z", + "start_time": "2025-12-23T09:34:26.918913012Z", + "end_time": "2025-12-23T09:34:26.918944313Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3786, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/004317d0580a86bc3c0a10d9932dcad75818e349.json b/data/output/004317d0580a86bc3c0a10d9932dcad75818e349.json new file mode 100644 index 0000000..9f4daa8 --- /dev/null +++ b/data/output/004317d0580a86bc3c0a10d9932dcad75818e349.json @@ -0,0 +1,290 @@ +{ + "file_name": "004317d0580a86bc3c0a10d9932dcad75818e349.txt", + "total_words": 463, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "gay", + "count": 12 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "court", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "by", + "count": 7 + }, + { + "word": "sex", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Gay .", + "length": 5 + }, + { + "text": "Updated: .", + "length": 10 + }, + { + "text": "Conservative .", + "length": 14 + }, + { + "text": "Simon Tomlinson .", + "length": 17 + }, + { + "text": "conservative country.", + "length": 21 + }, + { + "text": "18:29 GMT, 23 February 2012 .", + "length": 29 + }, + { + "text": "But the Home Ministry was forced to .", + "length": 37 + }, + { + "text": "of the same gender punishable by up to 10 years in prison.", + "length": 58 + }, + { + "text": "people - including conservative groups and gay rights activists.", + "length": 64 + }, + { + "text": "followed by courts across the nation and end widespread police .", + "length": 64 + }, + { + "text": "rights activists celebrated the 2009 ruling, hoping it would be .", + "length": 65 + }, + { + "text": "and that the ministry 'has not taken any position on homosexuality'.", + "length": 68 + }, + { + "text": "groups have asked the top court to overturn the lower court's order, .", + "length": 70 + }, + { + "text": "in 2009 which struck down a colonial-era law making sex between people .", + "length": 72 + }, + { + "text": "and Supreme Court judges are currently hearing opinions from a range of .", + "length": 73 + }, + { + "text": "statement said that the Cabinet had decided not to challenge the ruling .", + "length": 73 + }, + { + "text": "harassment and lead to gradual acceptance for homosexuals in this deeply .", + "length": 74 + }, + { + "text": "issue a clarification saying it accepts a ruling by the Delhi High Court .", + "length": 74 + }, + { + "text": "While actual criminal prosecutions were few, the law frequently was used to harass people.", + "length": 90 + }, + { + "text": "Chaos: Homosexual rights supporters take part in the Gay Pride Parade in New Delhi in 2010.", + "length": 91 + }, + { + "text": "Many bars have gay nights, and some high-profile Bollywood films have dealt with gay issues.", + "length": 92 + }, + { + "text": "The last two years have also seen large gay pride parades in New Delhi and other big cities, including Mumbai and Kolkata.", + "length": 122 + }, + { + "text": "Over the last decade, homosexuals have slowly gained a degree of acceptance in some parts of India, especially its big cities.", + "length": 126 + }, + { + "text": "Many will be left confused by today's events in which the government appeared to announce differing views on laws allowing gay sex .", + "length": 132 + }, + { + "text": "Still, being gay remains deeply taboo in most of the country, and many homosexuals hide their sexual orientation from friends and relatives.", + "length": 140 + }, + { + "text": "The high court had said that treating consensual gay sex between adults as a crime was a violation of fundamental rights protected by India's constitution.", + "length": 155 + }, + { + "text": "Sex between people of the same gender had been illegal in India since the 1860s when a British colonial law classified it as 'against the order of nature'.", + "length": 155 + }, + { + "text": "Television channels reported that the lawyer may have gotten confused and read out the wrong statement in front of the Supreme Court which reflected an old government opinion.", + "length": 175 + }, + { + "text": "Conflicting views: Additional Solicitor General PP Malhotra initially told the Indian Supreme Court (above) that gay sex was 'highly immoral' but was quickly contradicted by the government .", + "length": 190 + }, + { + "text": "A sensitive debate over gay sex in India was thrown into farce today after a government lawyer urged the Supreme Court to ban it - only for the Home Ministry to quickly issue a contradictory statement hours later.", + "length": 213 + }, + { + "text": "Additional Solicitor General PP Malhotra told the court that 'gay sex is highly immoral and against social order and there is high chance of spreading of diseases through such acts,' the Press Trust of India reported.", + "length": 217 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7929060459136963 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:27.419147471Z", + "first_section_created": "2025-12-23T09:34:27.419436983Z", + "last_section_published": "2025-12-23T09:34:27.419591189Z", + "all_results_received": "2025-12-23T09:34:27.479744813Z", + "output_generated": "2025-12-23T09:34:27.47991082Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:27.419436983Z", + "publish_time": "2025-12-23T09:34:27.419591189Z", + "first_worker_start": "2025-12-23T09:34:27.420125211Z", + "last_worker_end": "2025-12-23T09:34:27.478804Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:27.420134611Z", + "start_time": "2025-12-23T09:34:27.420178013Z", + "end_time": "2025-12-23T09:34:27.420240515Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:27.420309Z", + "start_time": "2025-12-23T09:34:27.420448Z", + "end_time": "2025-12-23T09:34:27.478804Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:27.420065008Z", + "start_time": "2025-12-23T09:34:27.420125211Z", + "end_time": "2025-12-23T09:34:27.420187913Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:27.420075809Z", + "start_time": "2025-12-23T09:34:27.420140911Z", + "end_time": "2025-12-23T09:34:27.420170512Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2827, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/0043419182bdb005875dd3da575ebe38570907e8.json b/data/output/0043419182bdb005875dd3da575ebe38570907e8.json new file mode 100644 index 0000000..956f07e --- /dev/null +++ b/data/output/0043419182bdb005875dd3da575ebe38570907e8.json @@ -0,0 +1,274 @@ +{ + "file_name": "0043419182bdb005875dd3da575ebe38570907e8.txt", + "total_words": 614, + "top_n_words": [ + { + "word": "the", + "count": 31 + }, + { + "word": "to", + "count": 28 + }, + { + "word": "is", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "renzi", + "count": 14 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "he", + "count": 11 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "and", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Above all, he must find support.", + "length": 32 + }, + { + "text": "''So he will have to act quickly.", + "length": 33 + }, + { + "text": "So this is going to be a difficulty.", + "length": 36 + }, + { + "text": "\" Something, being the optimal word.", + "length": 36 + }, + { + "text": "WATCH MORE: The future of Europe's economy .", + "length": 44 + }, + { + "text": "But he'll need more than stamina to succeed.", + "length": 44 + }, + { + "text": "READ MORE: Is Matteo Renzi ready to be Italy's PM?", + "length": 50 + }, + { + "text": "'' Yet if anything, Renzi is a long distance player.", + "length": 52 + }, + { + "text": "''But I assure you,'' he said, ''I will give this commitment all the energy I have.", + "length": 83 + }, + { + "text": "''He is relatively little known compared to his two predecessors,'' says Scarpetta.", + "length": 83 + }, + { + "text": "''What Renzi's done is gutsy,'' says Giuseppe Ragusa of the Luiss Guido Carli University in Rome.", + "length": 97 + }, + { + "text": "''But he is not going to have the public's support; he doesn't have the votes from the electoral poll.", + "length": 102 + }, + { + "text": "Vincenzo Scarpetta of London-based think tank Open Europe says Renzi will have to prove himself on the international stage.", + "length": 123 + }, + { + "text": "How he thinks he will manage to garner more support than career politicians, like his predecessor Enrico Letta, is as yet unclear.", + "length": 130 + }, + { + "text": "Still, top of the list for Renzi, will be moves to create the kind of political stability where such measures can actually take hold.", + "length": 133 + }, + { + "text": "Instead Ragusa says Italy is hoping that by virtue of his youth and dynamism Renzi will have the energy ''to do something very quickly.", + "length": 135 + }, + { + "text": "'' Commitment is something this former boy scout is known for and at less than half of the age of Silvio Berlusconi, Renzi certainly has energy.", + "length": 144 + }, + { + "text": "Then again, the 39-year-old's backers say this football-fan Mayor of Florence is precisely the breath of fresh air needed in Rome's stuffy halls of power.", + "length": 154 + }, + { + "text": "Italy has been crying out for a plausible, long-term economic agenda for years, leaving the country wholly unprepared for the economic slump of recent years.", + "length": 157 + }, + { + "text": "First there's a two trillion-euro debt pile to shrink, record unemployment, crippling and antiquated labor laws not to mention stifling business and payroll taxes.", + "length": 163 + }, + { + "text": "A marathon runner and keen sportsman, Renzi already has an eye on the distant horizon -- saying he wishes to see this term through until the next election in 2018.", + "length": 163 + }, + { + "text": "Often described as his country's answer to Tony Blair, Renzi is good at talking the big picture, which is probably just as well because Italy's problems aren't small.", + "length": 166 + }, + { + "text": "This means ploughing on with plans to reform the parliamentary system in a move which is likely to cost the country its upper house -- or senate -- in its current form.", + "length": 168 + }, + { + "text": "What's more: Renzi had initially vowed only to seek the top job through the ballot box and not a leadership contest, meaning some are skeptical about what he stands for.", + "length": 169 + }, + { + "text": "London (CNN) -- Il Rottomatore -- or \"the demolition man\" -- is how Italy's incoming prime minister has come to be known, thanks in part to his pugnacious approach to politics.", + "length": 176 + }, + { + "text": "Addressing reporters after being asked to form a government by Italy's President -- as protocol dictates -- Renzi said it would likely take a few days to get his key people in place.", + "length": 182 + }, + { + "text": "Neither an MP nor an elected premier, Renzi has managed to wrest control of the party's leadership by promising to smash the gridlocked reform process and shift its axis to the center.", + "length": 184 + }, + { + "text": "Matteo Renzi's nickname hardly bodes well for drumming up support in one of the most fractious governing systems on the planet, one which has speared all but one of its governments since World War II.", + "length": 200 + }, + { + "text": "However, Renzi may be on a collision course with Brussels after suggesting the EU give his nation some leeway to breach its 3% limit on the budget deficit in order to support a recent return to growth.", + "length": 201 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5779160857200623 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:27.92036377Z", + "first_section_created": "2025-12-23T09:34:27.92084819Z", + "last_section_published": "2025-12-23T09:34:27.921044698Z", + "all_results_received": "2025-12-23T09:34:27.987383371Z", + "output_generated": "2025-12-23T09:34:27.987569079Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:27.92084819Z", + "publish_time": "2025-12-23T09:34:27.921044698Z", + "first_worker_start": "2025-12-23T09:34:27.92159952Z", + "last_worker_end": "2025-12-23T09:34:27.986473Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:27.921570319Z", + "start_time": "2025-12-23T09:34:27.921653422Z", + "end_time": "2025-12-23T09:34:27.921740326Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:27.921807Z", + "start_time": "2025-12-23T09:34:27.921956Z", + "end_time": "2025-12-23T09:34:27.986473Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:27.921547518Z", + "start_time": "2025-12-23T09:34:27.921619421Z", + "end_time": "2025-12-23T09:34:27.921699024Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:27.921530817Z", + "start_time": "2025-12-23T09:34:27.92159952Z", + "end_time": "2025-12-23T09:34:27.921729125Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3465, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0043531ac872dbc37553dcb4e554ecc2ae254dcb.json b/data/output/0043531ac872dbc37553dcb4e554ecc2ae254dcb.json new file mode 100644 index 0000000..5410043 --- /dev/null +++ b/data/output/0043531ac872dbc37553dcb4e554ecc2ae254dcb.json @@ -0,0 +1,864 @@ +{ + "file_name": "0043531ac872dbc37553dcb4e554ecc2ae254dcb.txt", + "total_words": 1501, + "top_n_words": [ + { + "word": "the", + "count": 95 + }, + { + "word": "and", + "count": 55 + }, + { + "word": "a", + "count": 36 + }, + { + "word": "is", + "count": 35 + }, + { + "word": "in", + "count": 34 + }, + { + "word": "of", + "count": 33 + }, + { + "word": "to", + "count": 32 + }, + { + "word": "he", + "count": 27 + }, + { + "word": "his", + "count": 26 + }, + { + "word": "has", + "count": 24 + } + ], + "sorted_sentences": [ + { + "text": "Is .", + "length": 4 + }, + { + "text": "He .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "To .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Like .", + "length": 6 + }, + { + "text": "party.", + "length": 6 + }, + { + "text": "Note .", + "length": 6 + }, + { + "text": "Monaco.", + "length": 7 + }, + { + "text": "leader.", + "length": 7 + }, + { + "text": "passage.", + "length": 8 + }, + { + "text": "STRIKER .", + "length": 9 + }, + { + "text": "business.", + "length": 9 + }, + { + "text": "VERDICT: .", + "length": 10 + }, + { + "text": "[caption .", + "length": 10 + }, + { + "text": "VERDICT: .", + "length": 10 + }, + { + "text": "VERDICT: .", + "length": 10 + }, + { + "text": "VERDICT: .", + "length": 10 + }, + { + "text": "VERDICT: .", + "length": 10 + }, + { + "text": "VERDICT: .", + "length": 10 + }, + { + "text": "VERDICT: .", + "length": 10 + }, + { + "text": "Argentina .", + "length": 11 + }, + { + "text": "We hope so.", + "length": 11 + }, + { + "text": "LEFT BACK .", + "length": 11 + }, + { + "text": "out on top?", + "length": 11 + }, + { + "text": "Combative .", + "length": 11 + }, + { + "text": "Mesut Ozil .", + "length": 12 + }, + { + "text": "RIGHT BACK .", + "length": 12 + }, + { + "text": "Toni Kroos .", + "length": 12 + }, + { + "text": "Enzo Perez .", + "length": 12 + }, + { + "text": "Craig Hope .", + "length": 12 + }, + { + "text": "CENTRE-BACK .", + "length": 13 + }, + { + "text": "Marcos Rojo .", + "length": 13 + }, + { + "text": "CENTRE BACK .", + "length": 13 + }, + { + "text": "game-changing.", + "length": 14 + }, + { + "text": "Philipp Lahm .", + "length": 14 + }, + { + "text": "Lionel Messi .", + "length": 14 + }, + { + "text": "Sami Khedira .", + "length": 14 + }, + { + "text": "Lucas Biglia .", + "length": 14 + }, + { + "text": "Mats Hummels .", + "length": 14 + }, + { + "text": "Manuel Neuer .", + "length": 14 + }, + { + "text": "Sergio Romero .", + "length": 15 + }, + { + "text": "They are both .", + "length": 15 + }, + { + "text": "Thomas Muller .", + "length": 15 + }, + { + "text": "Ezequiel Garay .", + "length": 16 + }, + { + "text": "Miroslav Klose .", + "length": 16 + }, + { + "text": "Pablo Zabaleta .", + "length": 16 + }, + { + "text": "Jerome Boateng .", + "length": 16 + }, + { + "text": "Gonzalo Higuain .", + "length": 17 + }, + { + "text": "Benedikt Howedes .", + "length": 18 + }, + { + "text": "of Schweinsteiger.", + "length": 18 + }, + { + "text": "Ezequiel Lavezzi .", + "length": 18 + }, + { + "text": "Javier Mascherano .", + "length": 19 + }, + { + "text": "But who will come .", + "length": 19 + }, + { + "text": "Argentina Germany .", + "length": 19 + }, + { + "text": "Martin Demichelis .", + "length": 19 + }, + { + "text": "We are down to the .", + "length": 20 + }, + { + "text": "the standard he sets.", + "length": 21 + }, + { + "text": "ATTACKING MIDFIELDER .", + "length": 22 + }, + { + "text": "ATTACKING MIDFIELDER .", + "length": 22 + }, + { + "text": "DEFENSIVE MIDFIELDER .", + "length": 22 + }, + { + "text": "However, it hasn’t .", + "length": 22 + }, + { + "text": "Follow @@CraigHope01 .", + "length": 22 + }, + { + "text": "ATTACKING MIDFIELDER .", + "length": 22 + }, + { + "text": "DEFENSIVE MIDFIELDER .", + "length": 22 + }, + { + "text": "Bastian Schweinsteiger .", + "length": 24 + }, + { + "text": "He is dependable and a .", + "length": 24 + }, + { + "text": "would not stand a chance.", + "length": 25 + }, + { + "text": "of Howedes which wins him the nod.", + "length": 34 + }, + { + "text": "Final score: Germany 8-3 Argentina .", + "length": 36 + }, + { + "text": "Each of the above has been said of .", + "length": 36 + }, + { + "text": "Germany or Argentina crowned champion.", + "length": 38 + }, + { + "text": "and can’t dribble – sound familiar?", + "length": 39 + }, + { + "text": "Klose, Higuain is a man who trades in goals.", + "length": 44 + }, + { + "text": "healthy dose of old-school cynicism thrown in.", + "length": 46 + }, + { + "text": "who has the pick of the players on display… .", + "length": 47 + }, + { + "text": "His tackle to deny Arjen Robben at the death in .", + "length": 49 + }, + { + "text": "far away, the veteran has been solid and reliable.", + "length": 50 + }, + { + "text": "he saving one last explosion of magic for the final?", + "length": 52 + }, + { + "text": "hunger for goals has been rewarded on five occasions.", + "length": 53 + }, + { + "text": "Much more is needed if Argentina are to upset the odds.", + "length": 55 + }, + { + "text": "Scored the winner in the quarter-final defeat of France.", + "length": 56 + }, + { + "text": "He is, however, no match for the injured Angel Di Maria.", + "length": 56 + }, + { + "text": "He did well against Holland and is bright and energetic.", + "length": 56 + }, + { + "text": "VERDICT: Muller is a star, Lavezzi is something of a myth.", + "length": 58 + }, + { + "text": "He has come in for criticism back in Germany and rightly so.", + "length": 60 + }, + { + "text": "forwards at these finals have enjoyed coming up against him.", + "length": 60 + }, + { + "text": "For once Demichelis makes a mistake it is invariably punished.", + "length": 62 + }, + { + "text": "He breaks play, builds play and whips others into shape with .", + "length": 62 + }, + { + "text": "Romero, despite impressing, doesn’t come close to the German.", + "length": 63 + }, + { + "text": "will have to produce similar heroics if Argentina are to triumph.", + "length": 65 + }, + { + "text": "and boasts latter-stage Champions League experience with Dortmund.", + "length": 66 + }, + { + "text": "He was the star in the penalty-shootout victory over Holland and .", + "length": 66 + }, + { + "text": "Here, Sportsmail examines the likely starting XIs and determines .", + "length": 66 + }, + { + "text": "predatory in the big moments and is chosen ahead of the Argentine.", + "length": 66 + }, + { + "text": "and energetic, Zabaleta is very much the modern full-back with a .", + "length": 66 + }, + { + "text": "Khedira is a class act and brings the recent – and invaluable - .", + "length": 67 + }, + { + "text": "experience of his Champions League victory with Real Madrid to the .", + "length": 68 + }, + { + "text": "compliments well the snarl of Mascherano in the middle of the pitch.", + "length": 68 + }, + { + "text": "his credit, Romero has ignored – and subsequently silenced - the .", + "length": 68 + }, + { + "text": "The PSG forward works hard, yes, but where are the goals and assists?", + "length": 69 + }, + { + "text": "gets the nod given his athleticism and ability to recover a situation.", + "length": 70 + }, + { + "text": "to Barcelona: play him in midfield and Mascherano is the best in the .", + "length": 70 + }, + { + "text": "is nonetheless the best player on the park and without him Argentina .", + "length": 70 + }, + { + "text": "Higuain’s profligacy has almost cost his side but Klose has proved .", + "length": 70 + }, + { + "text": "Lazio man is understated but don’t underestimate his ability and he .", + "length": 71 + }, + { + "text": "Neuer is the best there is when it comes to the goalkeeping trade and .", + "length": 71 + }, + { + "text": "Manchester City past and present, but its former defender Boateng who .", + "length": 71 + }, + { + "text": "The Argentina man is an all-action hero whose side could not function .", + "length": 71 + }, + { + "text": "happened for him in Brazil and he has, in truth, been a disappointment.", + "length": 71 + }, + { + "text": "have looked better for his presence and, although a gaffe is never too .", + "length": 72 + }, + { + "text": "Another one which could go either way but Hummels is one cool customer .", + "length": 72 + }, + { + "text": "without his influence, and for that he is deserving of his place ahead .", + "length": 72 + }, + { + "text": "naysayers who deemed him the weak link after a season of inactivity at .", + "length": 72 + }, + { + "text": "the semi-final captured everything about his contribution; courageous, .", + "length": 72 + }, + { + "text": "Muller yet he has been the player of the tournament and his unyielding .", + "length": 72 + }, + { + "text": "admirably to their duty in Brazil but it is the experience and know-how .", + "length": 73 + }, + { + "text": "Both players would prefer to be playing at centre-back but have adapted .", + "length": 73 + }, + { + "text": "Much has been made of the scramble for his signature and now we know why.", + "length": 73 + }, + { + "text": "Benfica stopper marries grit and grizzle to his ability to play and few .", + "length": 73 + }, + { + "text": "final while Argentina have proved resilient and resourceful during their .", + "length": 74 + }, + { + "text": "Outstanding in the group stage but subdued in the knockout rounds, Messi .", + "length": 74 + }, + { + "text": "final two and this most enthralling of World Cups will climax with either .", + "length": 75 + }, + { + "text": "can’t tackle, isn’t a great passer, doesn’t really have a turn of foot .", + "length": 78 + }, + { + "text": "He keeps it simple but keeps the ball with it and is the heartbeat of this side.", + "length": 80 + }, + { + "text": "Experience: Schweinsteiger is playing in his sixth major tournament with Germany .", + "length": 82 + }, + { + "text": "Passion: Argentina's Marcos Rojo celebrates after the shootout win over the Dutch .", + "length": 83 + }, + { + "text": "worthy victors, Germany having coupled fortitude with flair on their march to the .", + "length": 83 + }, + { + "text": "Up top: Napoli striker Higuain will lead Argentina from the front against Germany .", + "length": 83 + }, + { + "text": "Getaway: PSG star Lavezzi escapes Georginio Wijnaldum during the nervy semi-final .", + "length": 83 + }, + { + "text": "Between the sticks: Romero's penalty heroics helped Argentina through to the final .", + "length": 84 + }, + { + "text": "Class act: Khedira controlled the game in Belo Horizonte as Germany beat Brazil 7-1 .", + "length": 85 + }, + { + "text": "Dependable: Benedikt Howedes has been solid throughout Germany's World Cup campaign .", + "length": 85 + }, + { + "text": "Roar: Ozil has the chance to answer his critics after a quiet World Cup for Germany .", + "length": 85 + }, + { + "text": "Versatile: Boateng fends off a challenge from Olivier Giroud in the quarter-final victory .", + "length": 91 + }, + { + "text": "It’s a simple science: goals win games and, to that end, Mirsolav Klose is a match-winner.", + "length": 92 + }, + { + "text": "Down in history: Miroslav Klose is now the all-time leading goalscorer in World Cup history .", + "length": 93 + }, + { + "text": "Switching up: Philipp Lahm was moved from midfield back to full-back by manager Joachim Low .", + "length": 93 + }, + { + "text": "Other end: Ezequiel Garay scores a penalty for Argentina in the semi-final win over Holland .", + "length": 93 + }, + { + "text": "On his watch: Toni Kroos is one German midfielder who can control the pace of the game in Rio .", + "length": 95 + }, + { + "text": "Not getting past: Manuel Neuer has been one of the outstanding goalkeepers of the competition .", + "length": 95 + }, + { + "text": "He gallops around the park but brings a classy, calming influence to the team at the same time.", + "length": 95 + }, + { + "text": "His 16 are now a World Cup record and, although he does little else around the pitch, who cares?", + "length": 96 + }, + { + "text": "Trusted: Man City defender Zabaleta has had a fantastic tournament at right-back for Argentina .", + "length": 96 + }, + { + "text": "VIDEO Scroll down for 'Beckham: Argentina will beat Germany 3-1, with Messi a winner' GOALKEEPER .", + "length": 98 + }, + { + "text": "Sliding in: City defender Demichelis did a good job marshaling Arjen Robben on Wednesday evening .", + "length": 98 + }, + { + "text": "VERDICT: Perhaps the toughest call on the park but Zabaleta shades it on his energy and aggression.", + "length": 99 + }, + { + "text": "Pace: Enzo Perez races away from Dirk Kuyt and Wesley Sneijder during the semi-final in Sao Paulo .", + "length": 99 + }, + { + "text": "Superstar: Lionel Messi finally has the chance to make himself a hero on the biggest stage of all .", + "length": 99 + }, + { + "text": "He has grown into the tournament and, given his ease in possession, fits well into the German model.", + "length": 100 + }, + { + "text": "On the chase: Muller has a chance of being named Golden Boot winner for a second straight World Cup .", + "length": 101 + }, + { + "text": "Handy: Hummels scored the only goal against France in the quarters and was in fine form against Brazil .", + "length": 104 + }, + { + "text": "VERDICT: Even if Ozil had impressed – which he hasn’t – Messi would still win this one hands down.", + "length": 104 + }, + { + "text": "Chance: Lazio midfielder Biglia has only started two of Argentina's six World Cup games this tournament .", + "length": 105 + }, + { + "text": "He is the lazy link in this fantastic German side and has been a passenger during their journey to the final.", + "length": 109 + }, + { + "text": "VERDICT: ‘Kroos Control’ is the ultimate midfield player and will again seek to dictate affairs in the final.", + "length": 113 + }, + { + "text": "The Bayern Wall is a solid foundation upon which to build a team and he has been an impassable presence in Brazil.", + "length": 114 + }, + { + "text": "He has the personality for the biggest stage and will be desperate to add a World Cup to his enviable haul of trophies.", + "length": 119 + }, + { + "text": "He turned in one of the performances of the finals against Brazil, combining graft and craft and chipping in with a goal.", + "length": 121 + }, + { + "text": "There were doubts about the Sporting defender given his preference for a more central posting but he has excelled at left-back.", + "length": 127 + }, + { + "text": "He is seldom exposed despite being more accustomed to a central role and is used to being charged with shackling the likes of Messi and Higuain.", + "length": 144 + }, + { + "text": "He’s on the shortlist for FIFA’s Golden Ball prize and that serves to highlight his assured and classy presence at the heart of the German backline.", + "length": 152 + }, + { + "text": "Even from full-back he is able to influence affairs with his intelligence and ability to keep the ball and is rarely shown up on a defensive front either.", + "length": 154 + }, + { + "text": "Be it five yards or fifty, left boot or right, Kroos locates his team-mates with unerring accuracy and – as his double against Brazil proved – he can be clinical in front of goal.", + "length": 183 + }, + { + "text": "The world’s number one No1 has proved his standing as thus at the finals and – like Dino Zoff, Gianluigi Buffon and Iker Casillas – is deserving of a World Cup winner’s medal.", + "length": 183 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.4352085590362549 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:28.421825079Z", + "first_section_created": "2025-12-23T09:34:28.422197894Z", + "last_section_published": "2025-12-23T09:34:28.422816419Z", + "all_results_received": "2025-12-23T09:34:28.509328206Z", + "output_generated": "2025-12-23T09:34:28.509656719Z", + "total_processing_time_ms": 87, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:28.422197894Z", + "publish_time": "2025-12-23T09:34:28.422520507Z", + "first_worker_start": "2025-12-23T09:34:28.422965925Z", + "last_worker_end": "2025-12-23T09:34:28.505144Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:28.423039128Z", + "start_time": "2025-12-23T09:34:28.423100731Z", + "end_time": "2025-12-23T09:34:28.423203335Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:28.423158Z", + "start_time": "2025-12-23T09:34:28.423313Z", + "end_time": "2025-12-23T09:34:28.505144Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 81 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:28.422897722Z", + "start_time": "2025-12-23T09:34:28.422965925Z", + "end_time": "2025-12-23T09:34:28.423116631Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:28.422931724Z", + "start_time": "2025-12-23T09:34:28.423020727Z", + "end_time": "2025-12-23T09:34:28.42307603Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:28.42259701Z", + "publish_time": "2025-12-23T09:34:28.422816419Z", + "first_worker_start": "2025-12-23T09:34:28.423224236Z", + "last_worker_end": "2025-12-23T09:34:28.508442Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:28.423214535Z", + "start_time": "2025-12-23T09:34:28.423259237Z", + "end_time": "2025-12-23T09:34:28.42334474Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:28.423405Z", + "start_time": "2025-12-23T09:34:28.423546Z", + "end_time": "2025-12-23T09:34:28.508442Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:28.423254837Z", + "start_time": "2025-12-23T09:34:28.42334134Z", + "end_time": "2025-12-23T09:34:28.423438844Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:28.423167033Z", + "start_time": "2025-12-23T09:34:28.423224236Z", + "end_time": "2025-12-23T09:34:28.423258937Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 165, + "min_processing_ms": 81, + "max_processing_ms": 84, + "avg_processing_ms": 82, + "median_processing_ms": 84, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4461, + "slowest_section_id": 1, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/0043612320aaede50bf515eacc66c7daeee9a21b.json b/data/output/0043612320aaede50bf515eacc66c7daeee9a21b.json new file mode 100644 index 0000000..8e225d6 --- /dev/null +++ b/data/output/0043612320aaede50bf515eacc66c7daeee9a21b.json @@ -0,0 +1,242 @@ +{ + "file_name": "0043612320aaede50bf515eacc66c7daeee9a21b.txt", + "total_words": 355, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "al", + "count": 10 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "shabaab", + "count": 8 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "said", + "count": 6 + }, + { + "word": "forces", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "Col.", + "length": 4 + }, + { + "text": "Col.", + "length": 4 + }, + { + "text": "Paddy Ankunda said.", + "length": 19 + }, + { + "text": "government in March 2008.", + "length": 25 + }, + { + "text": "Paddy Nkunda in a statement last week.", + "length": 38 + }, + { + "text": "Al-Shabaab was designated as a foreign terrorist organization by the U.", + "length": 71 + }, + { + "text": "Other Al-Shabaab attacks last week led to the deaths of at least 10 civilians.", + "length": 78 + }, + { + "text": "\"Our joint operations have gone extremely well today and over the weekend,\" AMISOM spokesman Lt.", + "length": 96 + }, + { + "text": "\"Casualties have been thankfully very low on our side, with just one killed and six minor injuries.", + "length": 99 + }, + { + "text": "Federal and African Union forces in the impoverished and chaotic nation have battled the group for years.", + "length": 105 + }, + { + "text": "The group is waging a war against Somalia's government to implement a stricter form of Islamic law, or Sharia.", + "length": 110 + }, + { + "text": "\" Last week Al-Shabaab claimed responsibility for a suicide truck bombing in the heart of Mogadishu that left dozens dead.", + "length": 122 + }, + { + "text": "\" \"It has been a big achievement to remove Al-Shabaab from the city, and put an end to the fighting that disrupted so many lives.", + "length": 129 + }, + { + "text": "But the challenge is now to protect civilians from the sort of terror attack we saw last week, as they attempt to rebuild their lives.", + "length": 134 + }, + { + "text": "Forces have pushed Al-Shabaab outside most of Mogadishu, but the group is still a major threat, said African Union forces spokesman Lt.", + "length": 135 + }, + { + "text": "Many analysts believe Al-Shabaab has been severely weakened by AMISOM, targeted strikes against foreign members and the weakening of al Qaeda.", + "length": 142 + }, + { + "text": "The outer north and eastern fringes of the city must still be cleared, but key ground and buildings are no longer under the control of the extremists.", + "length": 150 + }, + { + "text": "Military forces have managed to take the remaining strongholds of al Qaeda affiliate Al-Shabaab in the far northeast of the Somali capital, Mogadishu, the military said.", + "length": 169 + }, + { + "text": "Al-Shabaab said in August that it was withdrawing from Mogadishu, and the Transitional Federal Government, backed by African Union peacekeepers, now control most districts of the capital city, the United Nations office said.", + "length": 224 + }, + { + "text": "\"In effect, operations will now focus on the environs of the city and policing within the liberated areas,\" the African Union Mission in Somalia (AMISOM) said in a statement, adding that its troops worked with Transitional Federal Government forces.", + "length": 249 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.43878373503685 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:28.923541398Z", + "first_section_created": "2025-12-23T09:34:28.923858411Z", + "last_section_published": "2025-12-23T09:34:28.924045419Z", + "all_results_received": "2025-12-23T09:34:28.983939233Z", + "output_generated": "2025-12-23T09:34:28.984095139Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:28.923858411Z", + "publish_time": "2025-12-23T09:34:28.924045419Z", + "first_worker_start": "2025-12-23T09:34:28.924557139Z", + "last_worker_end": "2025-12-23T09:34:28.983052Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:28.92457354Z", + "start_time": "2025-12-23T09:34:28.924631642Z", + "end_time": "2025-12-23T09:34:28.924682244Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:28.924739Z", + "start_time": "2025-12-23T09:34:28.92488Z", + "end_time": "2025-12-23T09:34:28.983052Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:28.924557539Z", + "start_time": "2025-12-23T09:34:28.924628842Z", + "end_time": "2025-12-23T09:34:28.924689045Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:28.924495437Z", + "start_time": "2025-12-23T09:34:28.924557139Z", + "end_time": "2025-12-23T09:34:28.92457284Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2125, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/00436b4047dc31c9a8385b4846c9650f90fc80c7.json b/data/output/00436b4047dc31c9a8385b4846c9650f90fc80c7.json new file mode 100644 index 0000000..5c4b312 --- /dev/null +++ b/data/output/00436b4047dc31c9a8385b4846c9650f90fc80c7.json @@ -0,0 +1,330 @@ +{ + "file_name": "00436b4047dc31c9a8385b4846c9650f90fc80c7.txt", + "total_words": 768, + "top_n_words": [ + { + "word": "the", + "count": 44 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "s", + "count": 12 + }, + { + "word": "abu", + "count": 11 + }, + { + "word": "philippines", + "count": 11 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "militants", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "' The U.", + "length": 8 + }, + { + "text": "Around 600 U.", + "length": 13 + }, + { + "text": "commando attack four months later.", + "length": 34 + }, + { + "text": "-backed air strike in the Philippines, military chiefs said.", + "length": 60 + }, + { + "text": "had offered a $5million reward for the capture of Marwan, a U.", + "length": 62 + }, + { + "text": "They are still considered a key threat to the regional security.", + "length": 64 + }, + { + "text": "'This is a deliberate, fully planned attack coming from our forces.", + "length": 67 + }, + { + "text": "The military estimates the strength of Abu Sayyaf militants at about 400.", + "length": 73 + }, + { + "text": "Operation: Colonel Marcelo Burgos displays a picture of bin Hir, known as Marwan.", + "length": 81 + }, + { + "text": "The hostages separately regained their freedom months later, reportedly after ransom payments.", + "length": 94 + }, + { + "text": "South-east Asia's most-wanted terrorist and two other senior militants were killed today in a U.", + "length": 96 + }, + { + "text": "Maj Gen Coballes said: 'Our report is there were at least 15 killed, including their three leadership.", + "length": 102 + }, + { + "text": "Two other militants, Umbra Jumdail (top row, second right) and Abdullah Ali were also apparently killed .", + "length": 105 + }, + { + "text": "Campaign: A wanted poster released by the Philippine military shows bin Hir on the middle row, left side.", + "length": 105 + }, + { + "text": "Bombs were dropped on the forest hideout, but the bodies of the three terrorists have not been recovered .", + "length": 106 + }, + { + "text": "Target: Abu Pula, also known as Dr Abu, was the leader of Abu Sayyaf and the militant stronghold was run by his group .", + "length": 119 + }, + { + "text": "Jumdail, also known as Dr Abu, had eluded troops in numerous offensives and emerged as a key figure in the radical movement.", + "length": 124 + }, + { + "text": "-trained engineer accused of involvement in a number of deadly bombings in the Philippines and in the training of new militants.", + "length": 128 + }, + { + "text": "American counter-terrorism troops have helped ill-equipped Filipino troops track Marwan for years using satellite and drone surveillance.", + "length": 137 + }, + { + "text": "special forces have been deployed in the southern Philippines since 2002, providing a crucial support for its counter-terrorism operations.", + "length": 139 + }, + { + "text": "Abu Sayyaf is behind numerous ransom kidnappings, bomb attacks and beheadings which have terrorised the Philippines for more than two decades.", + "length": 142 + }, + { + "text": "They are believed to be holding a former Australian soldier who was kidnapped before Christmas as well as a Malaysian, a Japanese and an Indian.", + "length": 144 + }, + { + "text": "-backed Philippines offensives have been credited for the capture and killing of hundreds of Abu Sayyaf fighters and most top leaders since the 1990s.", + "length": 150 + }, + { + "text": "Marwan's death would also be a huge blow to Abu Sayyaf's ability to recover from years of battle setbacks through fundraising and training of new militants.", + "length": 156 + }, + { + "text": "The leader of the Philippines-based Abu Sayyaf militants, Umbra Jumdail, and Singaporean leader in Jemaah Islamiyah, Abdullah Ali, were also apparently killed.", + "length": 159 + }, + { + "text": "Major General Noel Coballes said no one was captured in the strike and some militants escaped, before returning later to retrieve the bodies of those who died.", + "length": 159 + }, + { + "text": "Most wanted: Zulkifli bin Abdul Hir, leader of an al Qaeda linked terror group in Asia, has apparently been killed in an air strike on a southern Philippines island .", + "length": 166 + }, + { + "text": "Patek is believed to have travelled back to Indonesia and then onward to Pakistan, leaving Marwan to take charge in the southern Philippines, military officials said.", + "length": 166 + }, + { + "text": "Most recently, all three of the militant leaders were among the prime suspects in the kidnappings of three Red Cross workers from Switzerland, Italy and the Philippines in 2009.", + "length": 177 + }, + { + "text": "During today's attack, the Philippines air force dropped four bombs weighing 500lb each from two planes, said Major General Jose Villarete, head of the 3rd Air Division based in Zamboanga city.", + "length": 193 + }, + { + "text": "Malaysian Zulkifi bin Hir, known as Marwan, a top leader of the al Qaeda-linked Jemaah Islamiyah terror network was apparently killed in a dawn strike on a militant stronghold on a southern island.", + "length": 197 + }, + { + "text": "Around 30 militants were in the camp near Parang town on Jolo Island - the Abu Sayyaf stronghold which they share with Indonesian-based Jemaah Islamiyah - when it was bombarded by two OV10 aircraft.", + "length": 198 + }, + { + "text": "Yesterday, gunmen on nearby Tawi-Tawi island province snatched Dutch and Swiss tourists and officials said they were attempting to move them to Jolo in an impoverished Muslim region 590 miles south of Manila.", + "length": 208 + }, + { + "text": "Patek and Marwan collaborated with Abu Sayyaf in training militants in bomb-making skills, seeking funding locally and abroad and plotting attacks, including against American troops in the southern Philippines.", + "length": 210 + }, + { + "text": "The bodies of the three men have not yet been recovered from the heavily-forested mountain camp because the area remains under the control of another rebel group - the Moro National Liberation Front - so their deaths have not been officially confirmed.", + "length": 252 + }, + { + "text": "If confirmed, Marwan's death would mark the most important success against regional terror network Jemaah Islamiyah since the January 2011 arrest of Indonesian suspect Umar Patek in Pakistan's garrison town of Abottabad, where Osama bin Laden was killed in a U.", + "length": 261 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7083154916763306 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:29.424803299Z", + "first_section_created": "2025-12-23T09:34:29.426329561Z", + "last_section_published": "2025-12-23T09:34:29.42654477Z", + "all_results_received": "2025-12-23T09:34:29.491726496Z", + "output_generated": "2025-12-23T09:34:29.491919604Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:29.426329561Z", + "publish_time": "2025-12-23T09:34:29.42654477Z", + "first_worker_start": "2025-12-23T09:34:29.427002988Z", + "last_worker_end": "2025-12-23T09:34:29.49083Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:29.426949186Z", + "start_time": "2025-12-23T09:34:29.427023889Z", + "end_time": "2025-12-23T09:34:29.427112093Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:29.427253Z", + "start_time": "2025-12-23T09:34:29.427405Z", + "end_time": "2025-12-23T09:34:29.49083Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:29.426956086Z", + "start_time": "2025-12-23T09:34:29.427002988Z", + "end_time": "2025-12-23T09:34:29.427105992Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:29.426940686Z", + "start_time": "2025-12-23T09:34:29.427012388Z", + "end_time": "2025-12-23T09:34:29.42704689Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4701, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/00444dd0ca369fa5b5c1e78721a9c519a1dfb8bf.json b/data/output/00444dd0ca369fa5b5c1e78721a9c519a1dfb8bf.json new file mode 100644 index 0000000..36e4913 --- /dev/null +++ b/data/output/00444dd0ca369fa5b5c1e78721a9c519a1dfb8bf.json @@ -0,0 +1,242 @@ +{ + "file_name": "00444dd0ca369fa5b5c1e78721a9c519a1dfb8bf.txt", + "total_words": 493, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "is", + "count": 11 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "tattoo", + "count": 10 + }, + { + "word": "communities", + "count": 7 + }, + { + "word": "it", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "He said: 'It is difficult to see how this venture can be justified.", + "length": 67 + }, + { + "text": "Jobless people are being offered a course on how to 'design your own tattoo' - paid for by taxpayers.", + "length": 101 + }, + { + "text": "The six-hour course is part of a publicly-funded Communities First scheme to help get people back to work .", + "length": 107 + }, + { + "text": "' A spokesman for Bridgend council said: 'The \"Design your own tattoo\" session should not be taken at face value.", + "length": 113 + }, + { + "text": "It is aimed to 'support people to acquire the appropriate skills to become job-ready and gain sustainable employment.", + "length": 117 + }, + { + "text": "' The tattoo sessions are being held for free at community centres in Caerau, Cornelly and Sarn areas of Bridgend later this month.", + "length": 131 + }, + { + "text": "But leading Lib-Dem Welsh Assembly member Peter Black hit out at the tattoo courses - and said it shows the job-creation scheme is a flop.", + "length": 138 + }, + { + "text": "'This important regeneration programme is not delivering what is expected of it and now we see it being reduced to fun tattoo design classes.", + "length": 141 + }, + { + "text": "' A spokesman for Communities and Tackling Poverty Minister Jeff Cuthbert hit back by dubbing the attack as 'snobbish and grossly misleading'.", + "length": 142 + }, + { + "text": "'The Government needs to revisit how they are spending this money so as to deliver the results we need in helping deprived communities back to work.", + "length": 148 + }, + { + "text": "The free sessions on tattoo art paid for by the Labour-run Welsh Government's anti-poverty programme was yesterday blasted as a 'waste of public money'.", + "length": 152 + }, + { + "text": "'It is carefully structured to help people develop confidence and important skills in areas such as communication, creative thinking and working with others.", + "length": 157 + }, + { + "text": "He said: 'The Work Programme publicises the fact that it has found work for young people in this growing industry - is Mr Black complaining about these jobs?", + "length": 157 + }, + { + "text": "The skin ink sessions for the unemployed are described a 'free fun interactive course that will develop your artistic skills to be able to design tattoos for yourself and others'.", + "length": 179 + }, + { + "text": "' 'The truth is he has seen one advert about one small project in one Communities First area and run to the press without first making proper enquiries about the facts of the matter.", + "length": 182 + }, + { + "text": "' The Labour Welsh Government has spent around more than £200million on the Communities First scheme over the last ten years - but did not disclose how much is spent on the tattoo course.", + "length": 188 + }, + { + "text": "Mr Black said: 'Tens of millions of pounds have been spent on Communities First over the last decade or so but what indicators that are available show that in employment, health and education terms this money has had little impact.", + "length": 231 + }, + { + "text": "A free course for the unemployed on tattoo design has been attacked as a waste of public money (pictured a tattoo artist at work at the London Tattoo Convention on model Lexy Hell) The six-hour course is being run at a series of venues around Bridgend, South Wales, as part of a publicly-funded Communities First scheme to help people get back to work.", + "length": 352 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.501965343952179 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:29.927326651Z", + "first_section_created": "2025-12-23T09:34:29.927623863Z", + "last_section_published": "2025-12-23T09:34:29.927816371Z", + "all_results_received": "2025-12-23T09:34:29.989852671Z", + "output_generated": "2025-12-23T09:34:29.990042079Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:29.927623863Z", + "publish_time": "2025-12-23T09:34:29.927816371Z", + "first_worker_start": "2025-12-23T09:34:29.928356693Z", + "last_worker_end": "2025-12-23T09:34:29.988882Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:29.928319491Z", + "start_time": "2025-12-23T09:34:29.928383094Z", + "end_time": "2025-12-23T09:34:29.928444196Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:29.928486Z", + "start_time": "2025-12-23T09:34:29.928625Z", + "end_time": "2025-12-23T09:34:29.988882Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:29.928319991Z", + "start_time": "2025-12-23T09:34:29.928508699Z", + "end_time": "2025-12-23T09:34:29.928867313Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:29.928272889Z", + "start_time": "2025-12-23T09:34:29.928356693Z", + "end_time": "2025-12-23T09:34:29.928395994Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2867, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0044698497bc738bb31b3927e6e843890cdba96f.json b/data/output/0044698497bc738bb31b3927e6e843890cdba96f.json new file mode 100644 index 0000000..cb82e0b --- /dev/null +++ b/data/output/0044698497bc738bb31b3927e6e843890cdba96f.json @@ -0,0 +1,686 @@ +{ + "file_name": "0044698497bc738bb31b3927e6e843890cdba96f.txt", + "total_words": 803, + "top_n_words": [ + { + "word": "the", + "count": 31 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "his", + "count": 14 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "league", + "count": 12 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "premier", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "84 .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "87 .", + "length": 4 + }, + { + "text": "84 .", + "length": 4 + }, + { + "text": "86 .", + "length": 4 + }, + { + "text": "89 .", + "length": 4 + }, + { + "text": "74 .", + "length": 4 + }, + { + "text": "77 .", + "length": 4 + }, + { + "text": "91 .", + "length": 4 + }, + { + "text": "80 .", + "length": 4 + }, + { + "text": "52 .", + "length": 4 + }, + { + "text": "90 .", + "length": 4 + }, + { + "text": "84 .", + "length": 4 + }, + { + "text": "103 .", + "length": 5 + }, + { + "text": "118 .", + "length": 5 + }, + { + "text": "111 .", + "length": 5 + }, + { + "text": "105 .", + "length": 5 + }, + { + "text": "2004 .", + "length": 6 + }, + { + "text": "1959 .", + "length": 6 + }, + { + "text": "1985 .", + "length": 6 + }, + { + "text": "2013 .", + "length": 6 + }, + { + "text": "1996 .", + "length": 6 + }, + { + "text": "2013 .", + "length": 6 + }, + { + "text": "2009 .", + "length": 6 + }, + { + "text": "1981 .", + "length": 6 + }, + { + "text": "1986 .", + "length": 6 + }, + { + "text": "1961 .", + "length": 6 + }, + { + "text": "1975 .", + "length": 6 + }, + { + "text": "2004 .", + "length": 6 + }, + { + "text": "1974 .", + "length": 6 + }, + { + "text": "2012 .", + "length": 6 + }, + { + "text": "2014 .", + "length": 6 + }, + { + "text": "1959 .", + "length": 6 + }, + { + "text": "Spurs .", + "length": 7 + }, + { + "text": "Leeds .", + "length": 7 + }, + { + "text": "35-6-9 .", + "length": 8 + }, + { + "text": "32-9-9 .", + "length": 8 + }, + { + "text": "26-8-16 .", + "length": 9 + }, + { + "text": "30-13-7 .", + "length": 9 + }, + { + "text": "Chelsea .", + "length": 9 + }, + { + "text": "Everton .", + "length": 9 + }, + { + "text": "Chelsea .", + "length": 9 + }, + { + "text": "36-10-4 .", + "length": 9 + }, + { + "text": "Arsenal .", + "length": 9 + }, + { + "text": "26-8-16 .", + "length": 9 + }, + { + "text": "22-14-14 .", + "length": 10 + }, + { + "text": "21-14-15 .", + "length": 10 + }, + { + "text": "26-11-13 .", + "length": 10 + }, + { + "text": "23-15-12 .", + "length": 10 + }, + { + "text": "13-13-24 .", + "length": 10 + }, + { + "text": "Man City .", + "length": 10 + }, + { + "text": "23-15-12 .", + "length": 10 + }, + { + "text": "22-18-10 .", + "length": 10 + }, + { + "text": "22-18-10 .", + "length": 10 + }, + { + "text": "20-14-16 .", + "length": 10 + }, + { + "text": "24-15-11 .", + "length": 10 + }, + { + "text": "Man City .", + "length": 10 + }, + { + "text": "Liverpool .", + "length": 11 + }, + { + "text": "Liverpool .", + "length": 11 + }, + { + "text": "Liverpool .", + "length": 11 + }, + { + "text": "Don Revie .", + "length": 11 + }, + { + "text": "Liverpool .", + "length": 11 + }, + { + "text": "Liverpool .", + "length": 11 + }, + { + "text": "Man United .", + "length": 12 + }, + { + "text": "Craig Hope .", + "length": 12 + }, + { + "text": "Man United .", + "length": 12 + }, + { + "text": "David Moyes .", + "length": 13 + }, + { + "text": "Bob Paisley .", + "length": 13 + }, + { + "text": "Division One .", + "length": 14 + }, + { + "text": "Division One .", + "length": 14 + }, + { + "text": "Division One .", + "length": 14 + }, + { + "text": "Division One .", + "length": 14 + }, + { + "text": "Brian Clough .", + "length": 14 + }, + { + "text": "Division Two .", + "length": 14 + }, + { + "text": "Bill Shankly .", + "length": 14 + }, + { + "text": "Division Two .", + "length": 14 + }, + { + "text": "Division One .", + "length": 14 + }, + { + "text": "Rafa Benitez .", + "length": 14 + }, + { + "text": "Nottm Forest .", + "length": 14 + }, + { + "text": "Division Two .", + "length": 14 + }, + { + "text": "Alex Ferguson .", + "length": 15 + }, + { + "text": "Jose Mourinho .", + "length": 15 + }, + { + "text": "Arsene Wenger .", + "length": 15 + }, + { + "text": "Jose Mourinho .", + "length": 15 + }, + { + "text": "Premier League .", + "length": 16 + }, + { + "text": "Kenny Dalglish .", + "length": 16 + }, + { + "text": "Premier League .", + "length": 16 + }, + { + "text": "Premier League .", + "length": 16 + }, + { + "text": "Premier League .", + "length": 16 + }, + { + "text": "Premier League .", + "length": 16 + }, + { + "text": "Howard Kendall .", + "length": 16 + }, + { + "text": "Premier League .", + "length": 16 + }, + { + "text": "Premier League .", + "length": 16 + }, + { + "text": "Premier League .", + "length": 16 + }, + { + "text": "Bill Nicholson .", + "length": 16 + }, + { + "text": "Brendan Rodgers .", + "length": 17 + }, + { + "text": "Roberto Mancini .", + "length": 17 + }, + { + "text": "Manuel Pellegrini .", + "length": 19 + }, + { + "text": "Follow @@CraigHope01 .", + "length": 22 + }, + { + "text": "Of his first 50 matches, 24 were to end in defeat.", + "length": 50 + }, + { + "text": "The theory, of course, is that time equates to success.", + "length": 55 + }, + { + "text": "So does Moyes deserve a second season to turn things around?", + "length": 60 + }, + { + "text": "It is, however, unlikely to be a milestone he will be celebrating.", + "length": 66 + }, + { + "text": "Sunday's trip to former club Everton will be his 51st at the helm.", + "length": 66 + }, + { + "text": "David Moyes has now marked 50 games in charge of Manchester United.", + "length": 67 + }, + { + "text": "Look at our table below and the evidence suggests he probably does.", + "length": 67 + }, + { + "text": "Rather, it may well come as a point of relief that he has survived this far.", + "length": 76 + }, + { + "text": "For it is fair to say his steering job of the champions has proved a rocky ride.", + "length": 80 + }, + { + "text": "Brendan Rodgers is another who endured criticism during the early days of his reign.", + "length": 84 + }, + { + "text": "Ferguson and Wenger share an identical record of 22 victories, 18 draws and 10 defeats.", + "length": 87 + }, + { + "text": "This, then, suggests Moyes should be granted the time to reverse fortunes at Old Trafford.", + "length": 90 + }, + { + "text": "The man in charge: Arsene Wenger oversees Arsenal's match against Leeds at Highbury back in 1996 .", + "length": 98 + }, + { + "text": "The Special One: Jose Mourinho is unveiled at Stamford Bridge for his first stint as Chelsea manager .", + "length": 102 + }, + { + "text": "Phone a friend: Brian Clough, posing with son Nigel, and Bill Shankly (right), who managed Liverpool .", + "length": 102 + }, + { + "text": "Indeed, his predecessor Roberto Mancini earned the equivalent of 20 points less than the current incumbent.", + "length": 107 + }, + { + "text": "In black and white: Sir Alex Ferguson shared an identical start to Arsene Wenger for their first 50 matches .", + "length": 109 + }, + { + "text": "All smiles: David Moyes will mark his 51st match in charge of Manchester United against his old club Everton .", + "length": 110 + }, + { + "text": "His first half century produced just four defeats and our table has him collecting 118 points from a possible 150.", + "length": 114 + }, + { + "text": "He was promoted to the role of player-manager at Leeds United in 1961 with the Elland Road club in the second tier.", + "length": 115 + }, + { + "text": "Moyes' return of 27-9-14 is the equivalent of being six points better off (*see below for our method of calculation).", + "length": 117 + }, + { + "text": "They did, of course, go on to enjoy silverware-laden stewardships having negotiated what were indifferent beginnings.", + "length": 117 + }, + { + "text": "Of the managers who have survived testing times before subsequently flourishing, Don Revie is perhaps the best example.", + "length": 119 + }, + { + "text": "In 2004, the Portuguese, having arrived from Champions League winners Porto, guided the Blues to Premier League victory.", + "length": 120 + }, + { + "text": "Along the way there have been calls for him to go, each of United's 10 defeats triggering forensic analysis of his failings.", + "length": 124 + }, + { + "text": "Incidentally, our findings show current Manchester City boss Manuel Pellegrini to be second best to Mourinho's effort a decade ago.", + "length": 131 + }, + { + "text": "Today, the Liverpool boss is on the brink of sealing his club's first title in 24 years, and he's still only 93 games in to his Anfield stay.", + "length": 141 + }, + { + "text": "In the current era, no manager has enjoyed a more lucrative return from their first 50 games than Jose Mourinho during his initial spell at Chelsea.", + "length": 148 + }, + { + "text": "Many would argue that is a somewhat short-sighted stance, for a bad manager would merely worsen the plight of a club given too long to inflicts his ills.", + "length": 153 + }, + { + "text": "But history, in fact, shows that the best managers - those we house in the bracket of 'greatness' - have been afforded time despite unspectacular starts.", + "length": 153 + }, + { + "text": "The board kept faith and, by 1974 and before Revie's exit for England, they had won two league championships, two Inter-Cities Fairs Cups, one FA Cup and one League Cup.", + "length": 169 + }, + { + "text": "There are sure to be similar inquests between now and the season's end and, once the campaign does reach its conclusion, the question will be asked: Does David Moyes deserve more time?", + "length": 184 + }, + { + "text": "Indeed, Sportsmail can reveal that, over the first 50 games of their respective tenures, the likes of Sir Alex Ferguson, Arsene Wenger, Bill Shankly and Brian Clough did not fare as well as Moyes.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.6063953042030334 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:30.428557751Z", + "first_section_created": "2025-12-23T09:34:30.428958567Z", + "last_section_published": "2025-12-23T09:34:30.429259679Z", + "all_results_received": "2025-12-23T09:34:30.498035651Z", + "output_generated": "2025-12-23T09:34:30.49826456Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:30.428958567Z", + "publish_time": "2025-12-23T09:34:30.429259679Z", + "first_worker_start": "2025-12-23T09:34:30.429675296Z", + "last_worker_end": "2025-12-23T09:34:30.496144Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:30.429758899Z", + "start_time": "2025-12-23T09:34:30.429820302Z", + "end_time": "2025-12-23T09:34:30.429960508Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:30.429955Z", + "start_time": "2025-12-23T09:34:30.430111Z", + "end_time": "2025-12-23T09:34:30.496144Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:30.429608293Z", + "start_time": "2025-12-23T09:34:30.429675296Z", + "end_time": "2025-12-23T09:34:30.429788001Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:30.429701597Z", + "start_time": "2025-12-23T09:34:30.429753399Z", + "end_time": "2025-12-23T09:34:30.4297858Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4705, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/00448c94515b85ac8667d279217e1a2fc00448f6.json b/data/output/00448c94515b85ac8667d279217e1a2fc00448f6.json new file mode 100644 index 0000000..1c68090 --- /dev/null +++ b/data/output/00448c94515b85ac8667d279217e1a2fc00448f6.json @@ -0,0 +1,226 @@ +{ + "file_name": "00448c94515b85ac8667d279217e1a2fc00448f6.txt", + "total_words": 247, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "a", + "count": 7 + }, + { + "word": "at", + "count": 6 + }, + { + "word": "alligator", + "count": 5 + }, + { + "word": "school", + "count": 5 + }, + { + "word": "s", + "count": 4 + }, + { + "word": "an", + "count": 3 + }, + { + "word": "and", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Associated Press .", + "length": 18 + }, + { + "text": "No injuries were reported.", + "length": 26 + }, + { + "text": "and Alex Greg for MailOnline .", + "length": 30 + }, + { + "text": "It was lassoed and removed after a brief struggle.", + "length": 50 + }, + { + "text": "'Somebody thinks today is the first day of school!", + "length": 50 + }, + { + "text": "The alligator was taken to a 'more natural habitat' than the school yard.", + "length": 73 + }, + { + "text": "The school district says classes are still scheduled to begin on August 25.", + "length": 75 + }, + { + "text": "Authorities found the reptile across campus near some air conditioning units.", + "length": 77 + }, + { + "text": "See ya later: The gator turned up at a Texas middle school but wasn't welcomed in class .", + "length": 89 + }, + { + "text": "Fort Bend County Sheriff's Office deputies were called out to capture the animal Wednesday at Beck Junior High.", + "length": 111 + }, + { + "text": "' Caught: It took deputies hours to catch the reptile, which managed to break two ropes they were thrown around its head .", + "length": 122 + }, + { + "text": "At @katyisd BeckJH,' Fort Bend Sheriff's Department posted on its Twitter account with a picture of the cornered alligator.", + "length": 123 + }, + { + "text": "An employee alerted authorities that the seven-foot alligator was outside so they could remove it before students return campus.", + "length": 128 + }, + { + "text": "'One of our deputies is kind of a veteran at doing this type of thing,' Bob Haenel of the Fort Bend County Sheriff’s Office told ABC.", + "length": 135 + }, + { + "text": "Teachers preparing for the first day of classes at a Texas school got an early morning surprise in the form of an alligator at the back door.", + "length": 141 + }, + { + "text": "'After several attempts of trying to rope the alligator, he managed to put a towel over the gator’s head to calm him down and then roped him.", + "length": 143 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.479548841714859 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:30.930297171Z", + "first_section_created": "2025-12-23T09:34:30.930635785Z", + "last_section_published": "2025-12-23T09:34:30.930819092Z", + "all_results_received": "2025-12-23T09:34:30.992461276Z", + "output_generated": "2025-12-23T09:34:30.992563981Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:30.930635785Z", + "publish_time": "2025-12-23T09:34:30.930819092Z", + "first_worker_start": "2025-12-23T09:34:30.931278911Z", + "last_worker_end": "2025-12-23T09:34:30.991577Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:30.931343313Z", + "start_time": "2025-12-23T09:34:30.931388615Z", + "end_time": "2025-12-23T09:34:30.931414916Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:30.931481Z", + "start_time": "2025-12-23T09:34:30.931668Z", + "end_time": "2025-12-23T09:34:30.991577Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:30.931426117Z", + "start_time": "2025-12-23T09:34:30.931489119Z", + "end_time": "2025-12-23T09:34:30.931546822Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:30.931224709Z", + "start_time": "2025-12-23T09:34:30.931278911Z", + "end_time": "2025-12-23T09:34:30.931295011Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1410, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0044b3c6fe4953df4f8851dec518421e7cbb1c07.json b/data/output/0044b3c6fe4953df4f8851dec518421e7cbb1c07.json new file mode 100644 index 0000000..3ca51ce --- /dev/null +++ b/data/output/0044b3c6fe4953df4f8851dec518421e7cbb1c07.json @@ -0,0 +1,234 @@ +{ + "file_name": "0044b3c6fe4953df4f8851dec518421e7cbb1c07.txt", + "total_words": 320, + "top_n_words": [ + { + "word": "the", + "count": 19 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "with", + "count": 8 + }, + { + "word": "lost", + "count": 6 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "sherlock", + "count": 6 + }, + { + "word": "airport", + "count": 5 + }, + { + "word": "is", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Puppy power!", + "length": 12 + }, + { + "text": "I really love that we're getting some help with that.", + "length": 53 + }, + { + "text": "Sherlock the beagle comes to the rescue to return a missing phone .", + "length": 67 + }, + { + "text": "'Our main goal is to return lost items to owners as fast as possible.", + "length": 69 + }, + { + "text": "'When you see the reactions of the passengers that really is amazing.", + "length": 69 + }, + { + "text": "A video released by the airline shows how Sherlock is doted on by staff.", + "length": 72 + }, + { + "text": "Star of the show: Sherlock happily poses for pictures with grateful passengers .", + "length": 80 + }, + { + "text": "Reunited: One man shows his delight as his expensive headphones are returned to him .", + "length": 85 + }, + { + "text": "On the trail: The dog sniffs the item then dashes through the airport to find its owner .", + "length": 89 + }, + { + "text": "Leaps and bounds: The unusual lost and found system has delighted passengers at Amsterdam Schipol .", + "length": 99 + }, + { + "text": "Sandra List, a member of KLM's Team Lost \u0026 Found said: 'I think he is a real asset to the company .", + "length": 99 + }, + { + "text": "' But rescuing abandoned items from planes is a tough job and Sherlock certainly needed some training.", + "length": 102 + }, + { + "text": "His handler Dirk van Driel, explained: 'We train for muscle strength endurance and of course socialisation.", + "length": 107 + }, + { + "text": "Lost and Found at Amsterdam airport just got a whole lot cuter, with the introduction of an investigative beagle.", + "length": 113 + }, + { + "text": "One woman who works on KLM's check-in desk said: 'He drops by regularly but I have to admit I do spoil him a bit.", + "length": 113 + }, + { + "text": "Airline KLM has employed a cute, uniform-wearing dog to help reunite passengers with lost items that they leave behind on planes.", + "length": 129 + }, + { + "text": "' Sherlock has become something of a star at the airport, with grateful customers posing for selfies with him and staff showing him with attention.", + "length": 147 + }, + { + "text": "The pooch - appropriately named Sherlock - uses its tracking skills to smell the lost item then dash through Amsterdam Schipol Airport to find the appropriate owner.", + "length": 167 + }, + { + "text": "The energetic beagle can be seen bounding through the airport with mobiles and headphones tucked in his pouch and even carrying stuffed toys back to their rightful owners.", + "length": 171 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.45116737484931946 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:31.431129655Z", + "first_section_created": "2025-12-23T09:34:31.43149537Z", + "last_section_published": "2025-12-23T09:34:31.431684877Z", + "all_results_received": "2025-12-23T09:34:31.491450486Z", + "output_generated": "2025-12-23T09:34:31.491589791Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:31.43149537Z", + "publish_time": "2025-12-23T09:34:31.431684877Z", + "first_worker_start": "2025-12-23T09:34:31.432299002Z", + "last_worker_end": "2025-12-23T09:34:31.490642Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:31.432309502Z", + "start_time": "2025-12-23T09:34:31.432368605Z", + "end_time": "2025-12-23T09:34:31.432405006Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:31.432485Z", + "start_time": "2025-12-23T09:34:31.432668Z", + "end_time": "2025-12-23T09:34:31.490642Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:31.4322608Z", + "start_time": "2025-12-23T09:34:31.432318303Z", + "end_time": "2025-12-23T09:34:31.432361104Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:31.432228499Z", + "start_time": "2025-12-23T09:34:31.432299002Z", + "end_time": "2025-12-23T09:34:31.432318303Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1861, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/0044b95b26f3a0b21da4568939e2f50c7597becf.json b/data/output/0044b95b26f3a0b21da4568939e2f50c7597becf.json new file mode 100644 index 0000000..2884685 --- /dev/null +++ b/data/output/0044b95b26f3a0b21da4568939e2f50c7597becf.json @@ -0,0 +1,444 @@ +{ + "file_name": "0044b95b26f3a0b21da4568939e2f50c7597becf.txt", + "total_words": 869, + "top_n_words": [ + { + "word": "the", + "count": 66 + }, + { + "word": "of", + "count": 29 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "horses", + "count": 17 + }, + { + "word": "wild", + "count": 16 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "herd", + "count": 11 + }, + { + "word": "island", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Mr .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "to approach.", + "length": 12 + }, + { + "text": "'The horses .", + "length": 13 + }, + { + "text": "them for meat.", + "length": 14 + }, + { + "text": "Amanda Williams .", + "length": 17 + }, + { + "text": "closer than a kilometre.", + "length": 24 + }, + { + "text": "05:09 EST, 11 July 2013 .", + "length": 25 + }, + { + "text": "05:48 EST, 11 July 2013 .", + "length": 25 + }, + { + "text": "rules of the Rostov Nature Reserve.", + "length": 35 + }, + { + "text": "'The herd didn't allow people to come .", + "length": 39 + }, + { + "text": "A wild horse gallops along Vodny Island.", + "length": 40 + }, + { + "text": "A wild mare stands alert by her young foal.", + "length": 43 + }, + { + "text": "5km wide, and has not a single tree or bush.", + "length": 44 + }, + { + "text": "The alpha stallion would run out to face an .", + "length": 45 + }, + { + "text": "5km wide, and has not a single tree or bush .", + "length": 45 + }, + { + "text": "He said: 'I've been around horses my whole life.", + "length": 48 + }, + { + "text": "grasses year-round and surviving without shelter.", + "length": 49 + }, + { + "text": "They are known as the wild horses of Vodny Island.", + "length": 50 + }, + { + "text": "The reserve rangers halted poaching of horses for meat.", + "length": 55 + }, + { + "text": "horses was relatively small, hovering around just a few dozen.", + "length": 62 + }, + { + "text": "A wild stallion rears up on his feet in front of three others.", + "length": 62 + }, + { + "text": "'The herd didn't allow people to come closer than a kilometre.", + "length": 62 + }, + { + "text": "Shpilenok added: 'Over time the horses developed an innate fear of .", + "length": 68 + }, + { + "text": "humans and machinery due to periodic attempts to catch them or shoot .", + "length": 70 + }, + { + "text": "adapted to living on their own in the dry steppe, feeding on natural .", + "length": 70 + }, + { + "text": "the Rostov Nature Reserve was created in 1995, the population of wild .", + "length": 71 + }, + { + "text": "'In 1995, the Rostov Nature Reserve was created, including Vodny Island.", + "length": 72 + }, + { + "text": "intruder, pawing his hooves aggressively, quelling any visitor's desire .", + "length": 73 + }, + { + "text": "It is the largest Island in the Manych-Gudilo Reservoir, at 12km long and up to 3.", + "length": 82 + }, + { + "text": "It is the largest Island in the Manych-Gudilo Reservoir, at 12km long and up to 3.", + "length": 82 + }, + { + "text": "The beautiful horses were cut off from the mainland when a canal was built in 1953.", + "length": 83 + }, + { + "text": "The beautiful horses were cut off from the mainland when a canal was built in 1953 .", + "length": 84 + }, + { + "text": "'In the first few years of the reserve's existence, the horses remained wary of humans.", + "length": 87 + }, + { + "text": "Nature staff have installed a 500m pipe from the mainland to provide fresh water to the horses.", + "length": 95 + }, + { + "text": "These are the wild Mustangs of Manych - who have been marooned on Russia's Vodny island for 60 years.", + "length": 101 + }, + { + "text": "' Nature staff have since installed a 500m pipe from the mainland to provide fresh water to the horses.", + "length": 103 + }, + { + "text": "A nature reserve maintains herd numbers at about 100-150 animals by capturing some of the young horses .", + "length": 104 + }, + { + "text": "It formed in 1953, when construction of the Nevinnomysky Canal transformed the depth of the Manych River.", + "length": 105 + }, + { + "text": "They have since bred into a huge herd, believed to be the largest and longest lived wild population in Russia.", + "length": 110 + }, + { + "text": "Today the nature reserve maintains herd numbers at about 100-150 animals by capturing some of the young horses.", + "length": 111 + }, + { + "text": "They have since bred into a huge herd, believed to be the largest and longest lived wild population in Russia .", + "length": 111 + }, + { + "text": "The herd formed in 1953, when construction of the Nevinnomysky Canal transformed the depth of the Manych River .", + "length": 112 + }, + { + "text": "The rest of the creatures remain wild and are afforded protection by the strict rules of the Rostov Nature Reserve .", + "length": 116 + }, + { + "text": "During my first trip to Rostov Nature Reserve in 1999 I couldn't even get close enough to photograph the wild animals.", + "length": 118 + }, + { + "text": "They had originally been tame, but after getting a taste of freedom the herd has now become too feral to corral again.", + "length": 118 + }, + { + "text": "Vodny Island - which translates as 'Water Island' - is surrounded by the saline waters of the Manych-Gudilo Reservoir.", + "length": 118 + }, + { + "text": "The herd grew to over 400 in 2006 but more than half of them were wiped out during a particularly harsh winter in 2009 .", + "length": 120 + }, + { + "text": "' Vodny Island - which translates as 'Water Island' - is surrounded by the saline waters of the Manych-Gudilo Reservoir.", + "length": 120 + }, + { + "text": "The herd grew to over 400 in 2006 but more than half of them were wiped out during a particularly harsh winter in 2009/10.", + "length": 122 + }, + { + "text": "These are the wild Mustangs of Manych - who have been all but forgotten after becoming marooned on Russia's Vodny island for 60 years .", + "length": 135 + }, + { + "text": "Since they were marooned, the horses have bred into a huge herd, believed to be the largest and longest lived wild population in Russia .", + "length": 137 + }, + { + "text": "The alpha stallion would run out to face an intruder, pawing his hooves aggressively, quelling any visitor's desire to approach' Before .", + "length": 137 + }, + { + "text": "Before the Rostov Nature Reserve was created in 1995, the population of wild horses was relatively small, hovering around just a few dozen .", + "length": 140 + }, + { + "text": "Mr Shpilenok added: 'Over time the horses developed an innate fear of humans and machinery due to periodic attempts to catch them or shoot them for meat .", + "length": 154 + }, + { + "text": "But the wild horses I had the opportunity to observe in the Rostov Nature Reserve in the southern part of European Russia are a completely different story.", + "length": 155 + }, + { + "text": "'Observing them is no less interesting than watching wild bears in Kamchatka, especially now, in springtime, when blood full of equine passion courses through their veins.", + "length": 171 + }, + { + "text": "These photographs were captured by Russian wildlife photographer Igor Shpilenok, 52, who described them as 'truly impressive' He said watching them was like watching bears, adding: 'especially now, in springtime, when blood full of equine passion courses through their veins' These photographs were captured by Russian wildlife photographer Igor Shpilenok, 52, who described them as 'truly impressive'.", + "length": 402 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6167840659618378 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:31.931850834Z", + "first_section_created": "2025-12-23T09:34:31.933857815Z", + "last_section_published": "2025-12-23T09:34:31.934172928Z", + "all_results_received": "2025-12-23T09:34:32.013465023Z", + "output_generated": "2025-12-23T09:34:32.01363463Z", + "total_processing_time_ms": 81, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 79, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:31.933857815Z", + "publish_time": "2025-12-23T09:34:31.934053723Z", + "first_worker_start": "2025-12-23T09:34:31.934528542Z", + "last_worker_end": "2025-12-23T09:34:32.012501Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:31.934549043Z", + "start_time": "2025-12-23T09:34:31.934620446Z", + "end_time": "2025-12-23T09:34:31.93471695Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:31.934853Z", + "start_time": "2025-12-23T09:34:31.935006Z", + "end_time": "2025-12-23T09:34:32.012501Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:31.934611945Z", + "start_time": "2025-12-23T09:34:31.934687548Z", + "end_time": "2025-12-23T09:34:31.934802653Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:31.934460439Z", + "start_time": "2025-12-23T09:34:31.934528542Z", + "end_time": "2025-12-23T09:34:31.934624046Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:31.934081424Z", + "publish_time": "2025-12-23T09:34:31.934172928Z", + "first_worker_start": "2025-12-23T09:34:31.934609445Z", + "last_worker_end": "2025-12-23T09:34:31.963529Z", + "total_journey_time_ms": 29, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:31.934691748Z", + "start_time": "2025-12-23T09:34:31.93472045Z", + "end_time": "2025-12-23T09:34:31.93472445Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:31.934963Z", + "start_time": "2025-12-23T09:34:31.935096Z", + "end_time": "2025-12-23T09:34:31.963529Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 28 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:31.934687948Z", + "start_time": "2025-12-23T09:34:31.934713349Z", + "end_time": "2025-12-23T09:34:31.93471725Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:31.934580744Z", + "start_time": "2025-12-23T09:34:31.934609445Z", + "end_time": "2025-12-23T09:34:31.934611645Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 105, + "min_processing_ms": 28, + "max_processing_ms": 77, + "avg_processing_ms": 52, + "median_processing_ms": 77, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2517, + "slowest_section_id": 0, + "slowest_section_time_ms": 78 + } +} diff --git a/data/output/0044c1120f5fd8d6d7947d839e5cb62fd46464a5.json b/data/output/0044c1120f5fd8d6d7947d839e5cb62fd46464a5.json new file mode 100644 index 0000000..112bdd6 --- /dev/null +++ b/data/output/0044c1120f5fd8d6d7947d839e5cb62fd46464a5.json @@ -0,0 +1,230 @@ +{ + "file_name": "0044c1120f5fd8d6d7947d839e5cb62fd46464a5.txt", + "total_words": 415, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "abuse", + "count": 7 + }, + { + "word": "have", + "count": 7 + }, + { + "word": "as", + "count": 6 + }, + { + "word": "victims", + "count": 6 + }, + { + "word": "and", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "’ ‘You are dealing with a massive, massive problem.", + "length": 55 + }, + { + "text": "‘We look upon child abuse and it's impact now as a national health epidemic.", + "length": 78 + }, + { + "text": "Mr Wilmer, an abuse victim himself, told Sky News: ‘There are potentially about 11.", + "length": 85 + }, + { + "text": "He said his estimation was based on ‘prevalence rates published by the Government’.", + "length": 87 + }, + { + "text": "One in six people in Britain may have been sexually abused, a government adviser has warned.", + "length": 92 + }, + { + "text": "Mrs May has apologised to victims for failing so far to find a suitable person to fill the role.", + "length": 96 + }, + { + "text": "Meanwhile, children's charity the NSPCC estimates one in 20 children in the UK have been sexually abused.", + "length": 105 + }, + { + "text": "The Government's inquiry has been dogged with problems, mainly around finding a chairman, since it was announced in July.", + "length": 121 + }, + { + "text": "Britain's Home Secretary Theresa May appointed Mr Wilmer to sit as an independent member of the inquiry into historic sex abuse .", + "length": 129 + }, + { + "text": "’ According to Sky News, statistics show one in six boys under 16 have been sexually abused - for girls the figure is one in four.", + "length": 132 + }, + { + "text": "Graham Wilmer was awarded an MBE by Prince Charles in January after years of campaigning for more support for sexual abuse victims .", + "length": 132 + }, + { + "text": "Mr Wilmer, who set up the Lantern Project charity to support victims, said the scale of abuse constituted a ‘national health epidemic’.", + "length": 139 + }, + { + "text": "7 million victims out there at the moment who have not disclosed, and many of those people will start to come forward in very significant numbers.", + "length": 146 + }, + { + "text": "Graham Wilmer – appointed by Theresa May to the controversial historic sex abuse inquiry – said there could be as many as 11 million victims in the UK.", + "length": 155 + }, + { + "text": "Baroness Butler-Sloss stood down as chairwoman in July amid questions over the role played by her late brother, Lord Havers, who was attorney general in the 1980s.", + "length": 163 + }, + { + "text": "Her replacement Fiona Woolf, the then Lord Mayor of London, resigned in October following a barrage of criticism over her ''establishment links'', most notably in relation to former home secretary Lord Brittan.", + "length": 210 + }, + { + "text": "From what we have seen, if you don't provide the right level of support and intervention to support people when they come forward you see very significant health problems, mental health and physical health, which have a direct cost to us as a society.", + "length": 251 + }, + { + "text": "And last week a number of alleged victims of child sexual abuse have said they will withdraw from the inquiry into the issue unless the Government makes major changes to it, including extending the period of time it will cover to further back than 1970.", + "length": 253 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6682436466217041 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:32.434911107Z", + "first_section_created": "2025-12-23T09:34:32.436830785Z", + "last_section_published": "2025-12-23T09:34:32.436981891Z", + "all_results_received": "2025-12-23T09:34:32.498220159Z", + "output_generated": "2025-12-23T09:34:32.498363965Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:32.436830785Z", + "publish_time": "2025-12-23T09:34:32.436981891Z", + "first_worker_start": "2025-12-23T09:34:32.437581515Z", + "last_worker_end": "2025-12-23T09:34:32.497354Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:32.437535413Z", + "start_time": "2025-12-23T09:34:32.437592716Z", + "end_time": "2025-12-23T09:34:32.437632917Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:32.437741Z", + "start_time": "2025-12-23T09:34:32.437904Z", + "end_time": "2025-12-23T09:34:32.497354Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:32.437572215Z", + "start_time": "2025-12-23T09:34:32.437653318Z", + "end_time": "2025-12-23T09:34:32.437720321Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:32.437517312Z", + "start_time": "2025-12-23T09:34:32.437581515Z", + "end_time": "2025-12-23T09:34:32.437603516Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2443, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/00459587adf060c93966ce7f1e2e33fae1d87f7c.json b/data/output/00459587adf060c93966ce7f1e2e33fae1d87f7c.json new file mode 100644 index 0000000..787ca6f --- /dev/null +++ b/data/output/00459587adf060c93966ce7f1e2e33fae1d87f7c.json @@ -0,0 +1,266 @@ +{ + "file_name": "00459587adf060c93966ce7f1e2e33fae1d87f7c.txt", + "total_words": 464, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "said", + "count": 9 + }, + { + "word": "wade", + "count": 9 + }, + { + "word": "i", + "count": 7 + }, + { + "word": "it", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "\"And that's the lesson for parents.", + "length": 35 + }, + { + "text": "\"There's no face-to-face interaction.", + "length": 37 + }, + { + "text": "Pay close attention to your children.", + "length": 37 + }, + { + "text": "Watch their social networking outlets.", + "length": 38 + }, + { + "text": "Watch how they talk and who they talk to.", + "length": 41 + }, + { + "text": "until two good girls, their worlds collided,\" he said.", + "length": 54 + }, + { + "text": "A life sentence was recommended by Florida prosecutors.", + "length": 55 + }, + { + "text": "\" \"The murder was no accident,\" Judge Joseph Bulone said.", + "length": 57 + }, + { + "text": "\" Wade's lawyer told HLN Friday that the sentence was \"very fair.", + "length": 65 + }, + { + "text": "\"I don't think we can appreciate how young people talk,\" he said.", + "length": 65 + }, + { + "text": "\" In Session Correspondent Beth Karas contributed to this report.", + "length": 65 + }, + { + "text": "The defense had recommended 15 years, followed by 15 years of probation.", + "length": 72 + }, + { + "text": "\" \"I just don't think this was a case that called for life,\" said Jay Hebert.", + "length": 77 + }, + { + "text": "TruTV's \"In Session\" correspondent Beth Karas spoke to Wade days before her sentencing.", + "length": 87 + }, + { + "text": "The feud culminated in a fatal confrontation in the early morning hours of April 15, 2009.", + "length": 90 + }, + { + "text": "Wade had claimed self-defense and hoped for an acquittal or no more than a manslaughter conviction.", + "length": 99 + }, + { + "text": "Hebert said Wade has resolved to teach young people about the dangers associated with social networking.", + "length": 104 + }, + { + "text": "\"I never meant to do it, and I'm still gonna have to live with it, no matter if I'm home or if I'm in prison.", + "length": 109 + }, + { + "text": "Wade went to trial in July, accused of second-degree murder in the stabbing death of 18-year-old Sarah Ludemann.", + "length": 112 + }, + { + "text": "\"I think about it every day, regardless if they give me five years or 20 years more than they could give me,\" Wade said.", + "length": 120 + }, + { + "text": "Hebert said the case is a cautionary tale about the potentially deadly mix of young people and modern communications technology.", + "length": 128 + }, + { + "text": "After a three-day trial and only two and a half hours of deliberation, a jury of five men and one woman convicted Wade of second-degree murder.", + "length": 143 + }, + { + "text": "While acknowledging mitigating factors -- primarily Wade's youth and lack of a criminal past -- the judge said her actions were not \"unaggravating.", + "length": 147 + }, + { + "text": "\" \"Because it's an explosive situation when when you don't have to be accountable, when you can break up with somebody or ask somebody to prom via text,\" he said.", + "length": 162 + }, + { + "text": "The two women, only teenagers at the time, had fought for months via voicemails, text messages and MySpace postings over their relationship with the same man, Joshua Camacho.", + "length": 174 + }, + { + "text": "(CNN) -- A Florida judge sentenced Rachel Wade, the 20-year-old woman convicted of second-degree murder for fatally stabbing her romantic rival in a fight last year, to 27 years in prison Friday.", + "length": 195 + }, + { + "text": "\"When you start looking at the tragic nature of this, the social networking, the instant messaging, the ability of people to hide behind the screen and make statements and create situations -- it just festered until it bubbled up and exploded into a situation...", + "length": 262 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7239944338798523 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:32.937737871Z", + "first_section_created": "2025-12-23T09:34:32.938115987Z", + "last_section_published": "2025-12-23T09:34:32.938252292Z", + "all_results_received": "2025-12-23T09:34:33.004360556Z", + "output_generated": "2025-12-23T09:34:33.004553964Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:32.938115987Z", + "publish_time": "2025-12-23T09:34:32.938252292Z", + "first_worker_start": "2025-12-23T09:34:32.938834516Z", + "last_worker_end": "2025-12-23T09:34:33.003381Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:32.938833416Z", + "start_time": "2025-12-23T09:34:32.938909019Z", + "end_time": "2025-12-23T09:34:32.938954921Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:32.939028Z", + "start_time": "2025-12-23T09:34:32.939153Z", + "end_time": "2025-12-23T09:34:33.003381Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:32.938751912Z", + "start_time": "2025-12-23T09:34:32.938834516Z", + "end_time": "2025-12-23T09:34:32.938886618Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:32.938786414Z", + "start_time": "2025-12-23T09:34:32.938846216Z", + "end_time": "2025-12-23T09:34:32.938867617Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2652, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0045b427592bc932808bbf17b319d796c861f840.json b/data/output/0045b427592bc932808bbf17b319d796c861f840.json new file mode 100644 index 0000000..8b0c512 --- /dev/null +++ b/data/output/0045b427592bc932808bbf17b319d796c861f840.json @@ -0,0 +1,486 @@ +{ + "file_name": "0045b427592bc932808bbf17b319d796c861f840.txt", + "total_words": 709, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "by", + "count": 7 + }, + { + "word": "for", + "count": 7 + }, + { + "word": "league", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "A .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "His .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "£678.", + "length": 6 + }, + { + "text": "Such .", + "length": 6 + }, + { + "text": "FIFA .", + "length": 6 + }, + { + "text": "There .", + "length": 7 + }, + { + "text": "Retired .", + "length": 9 + }, + { + "text": "Cricket .", + "length": 9 + }, + { + "text": "Premier .", + "length": 9 + }, + { + "text": "Halford, .", + "length": 10 + }, + { + "text": "’ FIFA .", + "length": 10 + }, + { + "text": "information.", + "length": 12 + }, + { + "text": "Charles Sale .", + "length": 14 + }, + { + "text": "’ Pietersen .", + "length": 15 + }, + { + "text": "in the extreme.", + "length": 15 + }, + { + "text": "the sexism scandal.", + "length": 19 + }, + { + "text": "MBN Promotions are .", + "length": 20 + }, + { + "text": "about the MCC design.", + "length": 21 + }, + { + "text": "Follow @@charliesale .", + "length": 22 + }, + { + "text": "tell all on October 16.", + "length": 23 + }, + { + "text": "certain Sir Elton John.", + "length": 23 + }, + { + "text": "KP’s food for thought .", + "length": 25 + }, + { + "text": "were bound to be hiccups.", + "length": 25 + }, + { + "text": "football’s ruling body.", + "length": 25 + }, + { + "text": "The FA declined to comment.", + "length": 27 + }, + { + "text": "agreement expires in September.", + "length": 31 + }, + { + "text": "second-tier side Clermont Foot.", + "length": 31 + }, + { + "text": "opportunity to raise awareness.", + "length": 31 + }, + { + "text": "It was approved by relevant FIFA .", + "length": 34 + }, + { + "text": "another table, and invited him over.", + "length": 36 + }, + { + "text": "Everton chairman and theatre tycoon Bill .", + "length": 42 + }, + { + "text": "and serious misdemeanours than Scudamore’s.", + "length": 45 + }, + { + "text": "Zurich say they have been considering such a .", + "length": 46 + }, + { + "text": "But there is a desire among them for their next .", + "length": 49 + }, + { + "text": "schools but not necessarily in terms of football.", + "length": 49 + }, + { + "text": "Kevin Pietersen’s version of his England divorce.", + "length": 51 + }, + { + "text": "An LTA spokeswoman said the initiative to give full .", + "length": 53 + }, + { + "text": "progress following serious illness, is unable to return.", + "length": 56 + }, + { + "text": "Kenwright bumped into the former Watford owner, who was on .", + "length": 60 + }, + { + "text": "committees with the investment coming from the current budget.", + "length": 62 + }, + { + "text": "confidentiality agreement will prevent him revealing too much.", + "length": 62 + }, + { + "text": "football coach in a major league after taking charge of French .", + "length": 64 + }, + { + "text": "is the hypocrisy around the Premier League that title sponsors .", + "length": 64 + }, + { + "text": "group of Premier League chairmen and chief executives informally .", + "length": 66 + }, + { + "text": "Leuviah Films production ‘as we considered this to be a unique .", + "length": 66 + }, + { + "text": "wine merchant Lionel Frumkin, who lives next to the proposed new .", + "length": 66 + }, + { + "text": "chairman to be from outside football if Anthony Fry, making good .", + "length": 67 + }, + { + "text": "43m since 2008 to cover fines for a variety of far more fraudulent .", + "length": 68 + }, + { + "text": "have been numerous glitches in the Lawn Tennis Association’s new .", + "length": 68 + }, + { + "text": "have defended president Sepp Blatter’s unilateral spending of an .", + "length": 68 + }, + { + "text": "will have a column in a national newspaper from June 1, but his ECB .", + "length": 69 + }, + { + "text": "fans willing to pay £2,000 for a table of 10 will be first to hear .", + "length": 69 + }, + { + "text": "the effect that women teachers had greatly improved the education in .", + "length": 70 + }, + { + "text": "Wimbledon ballot tickets at all LTA clubs, including losing personal .", + "length": 70 + }, + { + "text": "League clubs unanimously backed chief executive Richard Scudamore in .", + "length": 70 + }, + { + "text": "who represents the Manchester county FA on the council, said words to .", + "length": 71 + }, + { + "text": "proposal since 2004 and ‘agreed to contribute financially’ to the .", + "length": 71 + }, + { + "text": "staging the lunch - hosted by John Inverdale - at the Savoy for KP to .", + "length": 71 + }, + { + "text": "comes in the week Helena Costa became the first female  professional .", + "length": 71 + }, + { + "text": "ill-timed remark - albeit supported by some dinosaur council members - .", + "length": 72 + }, + { + "text": "ahead of their summer meeting were joined by one former club owner - a .", + "length": 72 + }, + { + "text": "Barclays, who were reported to be ‘deeply disappointed’ in Premier .", + "length": 72 + }, + { + "text": "computer system being rolled out after two years of trials to deal with .", + "length": 73 + }, + { + "text": "entrance at Lord’s, was leading the complaints to Westminster Council .", + "length": 73 + }, + { + "text": "transparency to the club ballot process had been well received and there .", + "length": 74 + }, + { + "text": "hope to recoup the money from revenue generated by the film - optimistic .", + "length": 74 + }, + { + "text": "alleged £16m on the film United Passions, charting the history of world .", + "length": 74 + }, + { + "text": "League chief executive Scudamore’s sexist emails, have had to set aside .", + "length": 75 + }, + { + "text": "discussing the big issues during a lunch at Scott’s in London’s Mayfair .", + "length": 77 + }, + { + "text": ".............................................................................", + "length": 77 + }, + { + "text": "An MCC spokesman said it was club policy to acquire properties adjacent to the ground.", + "length": 86 + }, + { + "text": "Support: Premier League clubs have backed chief executive Richard Scudamore in the sexism scandal .", + "length": 99 + }, + { + "text": "Two sides to every story: Cricket fans can pay to hear Kevin Pietersen's version of his England divorce .", + "length": 105 + }, + { + "text": "In attendance: Sir Elton John, former owner of Watford, joined Premier League chairmen at a lunch in Mayfair .", + "length": 110 + }, + { + "text": "MBN say: ‘We hear from the man himself exactly what went wrong last winter and how relationships deteriorated so badly.", + "length": 121 + }, + { + "text": "However, Lord’s tour guide Frumkin withdrew his protest when MCC paid £8m - well above the market value - to buy his house.", + "length": 126 + }, + { + "text": "Halford caused a stir in the room by effectively blaming the increase in women teachers for the chronic problems facing schools football.", + "length": 137 + }, + { + "text": "Row: Man City administrator Bernard Halford, pictured with Pablo Zabaleta, involved the FA in their own sexist comment moment this week .", + "length": 137 + }, + { + "text": "Manchester City’s long-serving administrator Bernard Halford involved the FA council in their own sexist comment moment in the wake of the Richard Scudamore controversy.", + "length": 171 + }, + { + "text": "City life president Halford, the club secretary for four decades, was speaking about the decline of schools football at the council summit this week in response to FA chairman Greg Dyke’s England Commission presentation.", + "length": 222 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7697592377662659 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:33.438970071Z", + "first_section_created": "2025-12-23T09:34:33.440365327Z", + "last_section_published": "2025-12-23T09:34:33.440566736Z", + "all_results_received": "2025-12-23T09:34:33.499306703Z", + "output_generated": "2025-12-23T09:34:33.499500311Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:33.440365327Z", + "publish_time": "2025-12-23T09:34:33.440566736Z", + "first_worker_start": "2025-12-23T09:34:33.441130058Z", + "last_worker_end": "2025-12-23T09:34:33.498502Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:33.441107057Z", + "start_time": "2025-12-23T09:34:33.44117676Z", + "end_time": "2025-12-23T09:34:33.441276464Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:33.441352Z", + "start_time": "2025-12-23T09:34:33.441538Z", + "end_time": "2025-12-23T09:34:33.498502Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:33.441048655Z", + "start_time": "2025-12-23T09:34:33.441130058Z", + "end_time": "2025-12-23T09:34:33.441223162Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:33.441075956Z", + "start_time": "2025-12-23T09:34:33.44116246Z", + "end_time": "2025-12-23T09:34:33.441210161Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4547, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/004601c9b2e7c19d43581c3ef5d26a63d2b30a32.json b/data/output/004601c9b2e7c19d43581c3ef5d26a63d2b30a32.json new file mode 100644 index 0000000..1e1c3a8 --- /dev/null +++ b/data/output/004601c9b2e7c19d43581c3ef5d26a63d2b30a32.json @@ -0,0 +1,366 @@ +{ + "file_name": "004601c9b2e7c19d43581c3ef5d26a63d2b30a32.txt", + "total_words": 702, + "top_n_words": [ + { + "word": "the", + "count": 55 + }, + { + "word": "his", + "count": 20 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "he", + "count": 16 + }, + { + "word": "hunter", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "at", + "count": 12 + }, + { + "word": "was", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "When .", + "length": 6 + }, + { + "text": "Flags .", + "length": 7 + }, + { + "text": "ceiling.", + "length": 8 + }, + { + "text": "'It’s .", + "length": 9 + }, + { + "text": "Hunter's .", + "length": 10 + }, + { + "text": "Wallenpaupack .", + "length": 15 + }, + { + "text": "horribly tragic.", + "length": 16 + }, + { + "text": "They arrived at .", + "length": 17 + }, + { + "text": "Jessica Jerreat .", + "length": 17 + }, + { + "text": "can never replace.", + "length": 18 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "usually caught to school.", + "length": 25 + }, + { + "text": "Lieutenant Chris Paris said.", + "length": 28 + }, + { + "text": "According to the Pocono Record, .", + "length": 33 + }, + { + "text": "40 caliber handgun with a laser sight.", + "length": 38 + }, + { + "text": "'The family has lost something that they .", + "length": 42 + }, + { + "text": "His teachers, counselors, people in the building.", + "length": 49 + }, + { + "text": "The family is devastated,' police station commander .", + "length": 53 + }, + { + "text": "parents were not with him at the time of the shooting.", + "length": 54 + }, + { + "text": "the grandparents' home shortly after police were called.", + "length": 56 + }, + { + "text": "single bullet hole could be seen in the corner of the room.", + "length": 59 + }, + { + "text": "Hunter's body was found on the floor of his uncle's bedroom, and a .", + "length": 68 + }, + { + "text": "it very difficult, staff who worked with Hunter throughout the years.", + "length": 69 + }, + { + "text": "outside the main school building were lowered to half-mast on Monday .", + "length": 70 + }, + { + "text": "and there was a single bullet hole in the corner of the room, near the .", + "length": 72 + }, + { + "text": "area superintendent Michael Silsby added: 'Not only children are taking .", + "length": 73 + }, + { + "text": "police arrived, the child’s body was found on the floor of the bedroom .", + "length": 74 + }, + { + "text": "morning, and grief counselors traveled on the school bus the 11-year-old .", + "length": 74 + }, + { + "text": "The gun had been laid on the washing machine in the laundry room after the shooting.", + "length": 84 + }, + { + "text": "'It’s hard, we’re going to miss Hunter very much,' school principal Amanda Cykosky said.", + "length": 92 + }, + { + "text": "Grief: Family and friends are mourning the loss of Hunter, pictured above with his two sisters .", + "length": 96 + }, + { + "text": "Tribute: Flags at Hunter's school were lowered on Monday as the community marked his sad passing .", + "length": 98 + }, + { + "text": "'Look, you have a red dot on your forehead,' Olm told police he remembered one of the boys saying.", + "length": 98 + }, + { + "text": "' A fundraising page has been set up by friends of the family, to help them cover funeral expenses.", + "length": 99 + }, + { + "text": "Tragic: Hunter Pedersen was shot in the head on Saturday evening as his uncle showed off his handguns .", + "length": 103 + }, + { + "text": "Accident: Chad Olm told police he didn't think the gun was loaded when he pointed it at his nephew's head .", + "length": 107 + }, + { + "text": "Hunter's father said that he doesn't believe Olm intended to hurt his son, but he is angry at his brother-in-law.", + "length": 113 + }, + { + "text": "Arrest: Chad Olm, left, is facing criminal homicide charges over the shooting of his 11-year-old nephew, Hunter .", + "length": 113 + }, + { + "text": "' Hunter's friends and teachers at his Wallenpaupack district school are also mourning the loss of the popular student.", + "length": 119 + }, + { + "text": "He said that when the two boys asked to see the gun collection, he removed three weapons from a safe, including a Glock 27 .", + "length": 124 + }, + { + "text": "According to WNEP, he told police he pointed the laser on the walls and ceilings before pointing it at his nephew's forehead.", + "length": 125 + }, + { + "text": "Collection: Olm was showing Hunter and his own son a Glock handgun, like the one pictured, which is fitted with a laser sight .", + "length": 127 + }, + { + "text": "Olm, who lives in the basement of the grandparents' home, had been showing the schoolboy and his own son his collection of guns.", + "length": 128 + }, + { + "text": "He said when Hunter, who has two sisters, reached for the gun, he pulled the trigger and a single bullet hit the boy above the eye.", + "length": 131 + }, + { + "text": "Hunter's father, Robert Pederson, was laid off at the start of April which has left the family without insurance to cover expenses.", + "length": 131 + }, + { + "text": "Hunter Pedersen had been visiting his grandparents' house at a gated community in Delaware when he was shot above the eye by Chad Olm.", + "length": 134 + }, + { + "text": "Devastated: Hunter's parents are distraught, after their 11-year-old, pictured with his father, Robert, was shot dead while at his grandparents .", + "length": 145 + }, + { + "text": "An 11-year-old boy from New Jersey was shot dead on Saturday when his uncle, who was showing off a handgun, pointed its laser at the child's head.", + "length": 146 + }, + { + "text": "The 34-year-old, who has been charged with criminal homicide and endangering the welfare of a child, told police he didn't believe the gun was loaded.", + "length": 150 + }, + { + "text": "Olm told police he didn't keep his guns loaded, and said he didn't check to see if the handgun, which didn't have a magazine in it at the time, had any rounds of ammunition in it.", + "length": 179 + }, + { + "text": "Olm is being held in Pike County correctional facility, where he is facing charges of criminal homicide, recklessly endangering another person, and endangering the welfare of children.", + "length": 184 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6839590668678284 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:33.941344017Z", + "first_section_created": "2025-12-23T09:34:33.941718632Z", + "last_section_published": "2025-12-23T09:34:33.94190834Z", + "all_results_received": "2025-12-23T09:34:34.007377278Z", + "output_generated": "2025-12-23T09:34:34.007556185Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:33.941718632Z", + "publish_time": "2025-12-23T09:34:33.94190834Z", + "first_worker_start": "2025-12-23T09:34:33.942564866Z", + "last_worker_end": "2025-12-23T09:34:34.006333Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:33.942569166Z", + "start_time": "2025-12-23T09:34:33.942638769Z", + "end_time": "2025-12-23T09:34:33.942708672Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:33.942775Z", + "start_time": "2025-12-23T09:34:33.942926Z", + "end_time": "2025-12-23T09:34:34.006333Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:33.942492563Z", + "start_time": "2025-12-23T09:34:33.942572667Z", + "end_time": "2025-12-23T09:34:33.94266427Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:33.942498964Z", + "start_time": "2025-12-23T09:34:33.942564866Z", + "end_time": "2025-12-23T09:34:33.942612268Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3961, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0046136b3539e80c930ab176ea05409b2b9f8874.json b/data/output/0046136b3539e80c930ab176ea05409b2b9f8874.json new file mode 100644 index 0000000..4dbdf5e --- /dev/null +++ b/data/output/0046136b3539e80c930ab176ea05409b2b9f8874.json @@ -0,0 +1,430 @@ +{ + "file_name": "0046136b3539e80c930ab176ea05409b2b9f8874.txt", + "total_words": 865, + "top_n_words": [ + { + "word": "the", + "count": 45 + }, + { + "word": "he", + "count": 29 + }, + { + "word": "a", + "count": 27 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "was", + "count": 24 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "sharp", + "count": 20 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "s", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "So .", + "length": 4 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "'Mr.", + "length": 4 + }, + { + "text": "'Mr.", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Ever.", + "length": 5 + }, + { + "text": "When .", + "length": 6 + }, + { + "text": "During .", + "length": 8 + }, + { + "text": "9million.", + "length": 9 + }, + { + "text": "Tara Brady .", + "length": 12 + }, + { + "text": "he needed money.", + "length": 16 + }, + { + "text": "and illegal drugs.", + "length": 18 + }, + { + "text": "transporting drugs.", + "length": 19 + }, + { + "text": "Scroll Down to Watch Video .", + "length": 28 + }, + { + "text": "close to him during the 'war'.", + "length": 30 + }, + { + "text": "His attorney filed an 18-page .", + "length": 31 + }, + { + "text": "All but two have since pleaded guilty.", + "length": 38 + }, + { + "text": "Sharp is dreadfully sorry,' Goldberg said.", + "length": 42 + }, + { + "text": "was truly an aberration from a law-abiding life.", + "length": 48 + }, + { + "text": "be appropriate for the Michigan City, Indiana resident.", + "length": 55 + }, + { + "text": "cocaine stashed in five bags with a wholesale value of $2.", + "length": 58 + }, + { + "text": "So how did Leo Sharp become the oldest drug mule in the U.", + "length": 58 + }, + { + "text": "He was arrested in February in an unrelated investigation.", + "length": 58 + }, + { + "text": "A note found in his car led police to Viejo, Sharp's handler.", + "length": 61 + }, + { + "text": "after he was caught with 230lb of a Mexican cartel's cocaine?", + "length": 61 + }, + { + "text": "However, Sharp also said he was threatened when he tried to stop .", + "length": 66 + }, + { + "text": "' Sharp was eventually sentenced to three years in federal prison.", + "length": 66 + }, + { + "text": "the trooper asked Sharp if he could search the truck, Sharp refused.", + "length": 68 + }, + { + "text": "a subsequent search of the truck bed, troopers found 104 bricks of .", + "length": 68 + }, + { + "text": "expensive burden on the government if sent to prison, Goldberg said.", + "length": 68 + }, + { + "text": "Sharp made a monumental mistake at a moment of perceived financial .", + "length": 68 + }, + { + "text": "hearing, telling the judge he lost his hearing after a cannon fired .", + "length": 69 + }, + { + "text": "married father-of-three made a highly amusing appearance at the court .", + "length": 71 + }, + { + "text": "the officer requested a backup unit with a dog trained to detect bombs .", + "length": 72 + }, + { + "text": "sentencing memo hoping to persuade a judge that home confinement would .", + "length": 72 + }, + { + "text": "preparation for the sentencing hearing, Sharp told court officials that .", + "length": 73 + }, + { + "text": "weakness, and was exploited and threatened, but his conduct in this case .", + "length": 74 + }, + { + "text": "He was diagnosed with dementia and other health problems and would be an .", + "length": 74 + }, + { + "text": "He was eventually arrested in October 2011 following a routine traffic stop.", + "length": 76 + }, + { + "text": "Joaquin Guzman, also known as El Chapo or Shorty, was arrested in February .", + "length": 76 + }, + { + "text": "' However, he also led another life as one of Sinaloa's most trusted couriers.", + "length": 78 + }, + { + "text": "Four months after Sharpe's arrest, police raided 10 locations making 19 charges.", + "length": 80 + }, + { + "text": "Busted: Leo Sharp was a drug courier and known as 'grandad' to the Sinaloa cartel .", + "length": 83 + }, + { + "text": "' Sharp was charged with conspiracy and possession with intent to distribute cocaine.", + "length": 85 + }, + { + "text": "The street price of cocaine per kilogram has also increased from about $30,000 to $43,000.", + "length": 90 + }, + { + "text": "When he was told he would have to take a drug test, he said: 'I've never used a drug in my life.", + "length": 96 + }, + { + "text": "Chapo Guzman, the head of the entire Sinaloa cartel, was one of the most wanted fugitives in the world.", + "length": 103 + }, + { + "text": "Leo Sharp was found guilty of transporting cocaine across America for a notorious Mexican drug cartel .", + "length": 103 + }, + { + "text": "He was the 90-year-old Second World War veteran and one of America's foremost experts of day-lily flowers.", + "length": 106 + }, + { + "text": "Jailed: Leo Sharp is America's oldest drug mule after he was caught with 230lb of a Mexican cartel's cocaine .", + "length": 110 + }, + { + "text": "Despite his arrest and the sentencing of other members of the Sinaloa, experts believe the cartel will continue.", + "length": 112 + }, + { + "text": "'He has a legitimate ID, he’s an older guy, he wouldn’t be pegged as a drug runner and he has no criminal history.", + "length": 118 + }, + { + "text": "One question which hung over the case was whether Sharp was fully aware of what he was doing or whether he was senile.", + "length": 118 + }, + { + "text": "Couriers were normally paid $1,000 per kilo so Sharp would have made $104,000 on the trip when he was arrested in 2011.", + "length": 119 + }, + { + "text": "Despite never divulging his relationship with the cartel or how he managed to evade detection he did help investigators.", + "length": 120 + }, + { + "text": "' The government said it had evidence that Sharp was transporting marijuana and cocaine from the West Coast back in 2000.", + "length": 121 + }, + { + "text": "Sharp, from Michigan City, Indiana, carried thousands of pounds of cocaine for the Sinaloa cartel over a period of 10 years.", + "length": 124 + }, + { + "text": "As the animal walked around the rear of Sharp's truck, it alerted to the possible presence of narcotics, the complaint said.", + "length": 124 + }, + { + "text": "In a feature for the New York Times Sam Donick tells the story of Tata, or grandfather, who has gone down as a Detroit urban legend.", + "length": 132 + }, + { + "text": "'He traveled with an entourage of Mexican farmhands to help with the hundreds of flowers he would give away, making his admirers swoon.", + "length": 135 + }, + { + "text": "He is a World War II veteran who fought in Italy and was awarded the Bronze Star, the fourth-highest honor, for his service, Goldberg said.", + "length": 139 + }, + { + "text": "' Florist Sharp attended flower conventions across the country 'dressed in either an all-white leisure suit or an all-black one', Dolnick writes.", + "length": 145 + }, + { + "text": "'He is a colorful, self-made, charitable man who has worked hard throughout this entire admirable, extraordinary, and long life,' Darryl Goldberg wrote.", + "length": 152 + }, + { + "text": "Special Agent Jeremy Fitch, one of the DEA agents who worked on the case which eventually caught Sharp, said: 'Leo is the perfect courier for the cartel.", + "length": 153 + }, + { + "text": "In 2010 alone, the great-grandfather is thought to have made more than $1million serving as a conduit for what was believed to be the biggest cocaine operation authorities in Detroit had ever seen.", + "length": 197 + }, + { + "text": "Despite facing 20 years in prison, prosecutors recommended he serve five because of his ageSharpe's lawyer also described him as a Second World War veteran who deserved mercy saying: 'That's not how we honour our heroes.", + "length": 220 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5580065846443176 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:34.442679621Z", + "first_section_created": "2025-12-23T09:34:34.443073137Z", + "last_section_published": "2025-12-23T09:34:34.443270845Z", + "all_results_received": "2025-12-23T09:34:34.510534555Z", + "output_generated": "2025-12-23T09:34:34.510762665Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:34.443073137Z", + "publish_time": "2025-12-23T09:34:34.443270845Z", + "first_worker_start": "2025-12-23T09:34:34.443851968Z", + "last_worker_end": "2025-12-23T09:34:34.509636Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:34.443864669Z", + "start_time": "2025-12-23T09:34:34.443918071Z", + "end_time": "2025-12-23T09:34:34.444006274Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:34.444064Z", + "start_time": "2025-12-23T09:34:34.444214Z", + "end_time": "2025-12-23T09:34:34.509636Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:34.443790166Z", + "start_time": "2025-12-23T09:34:34.443851968Z", + "end_time": "2025-12-23T09:34:34.443973173Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:34.443867369Z", + "start_time": "2025-12-23T09:34:34.443934772Z", + "end_time": "2025-12-23T09:34:34.443980773Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4972, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/004636d2b06ffc786148189e4d8e5e950bd46e8f.json b/data/output/004636d2b06ffc786148189e4d8e5e950bd46e8f.json new file mode 100644 index 0000000..f6892de --- /dev/null +++ b/data/output/004636d2b06ffc786148189e4d8e5e950bd46e8f.json @@ -0,0 +1,242 @@ +{ + "file_name": "004636d2b06ffc786148189e4d8e5e950bd46e8f.txt", + "total_words": 445, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "ipad", + "count": 12 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "t", + "count": 8 + }, + { + "word": "at", + "count": 7 + }, + { + "word": "s", + "count": 7 + }, + { + "word": "verizon", + "count": 7 + }, + { + "word": "3g", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "com.", + "length": 4 + }, + { + "text": "Click here!", + "length": 11 + }, + { + "text": "Copyright 2010 Wired.", + "length": 21 + }, + { + "text": "com's Charlie Sorrel.", + "length": 21 + }, + { + "text": "WIRED: With iPad, Apple still has fatal attraction for AT\u0026T .", + "length": 61 + }, + { + "text": "Subscribe to WIRED magazine for less than $1 an issue and get a FREE GIFT!", + "length": 74 + }, + { + "text": "If you don't plan to be on the road a lot, there's still the Wi-Fi option.", + "length": 74 + }, + { + "text": "Whether Apple hammers out sales agreements with Verizon or Sprint remains to be seen.", + "length": 85 + }, + { + "text": "That's up from the two versions Apple currently offers: UMTS plus Wi-Fi, and Wi-Fi only.", + "length": 88 + }, + { + "text": "Recent rumors suggestion that the iPad 2 will hit stores April 2011, one year after the original iPad's release.", + "length": 112 + }, + { + "text": "Currently the 3G iPad ships with a MicroSIM card slot, and in the United States, the only carrier that uses MicroSIM is AT\u0026T.", + "length": 125 + }, + { + "text": "Persistent rumors -- so far unsubstantiated -- have also pointed to a Verizon-compatible iPhone to be released in early 2011.", + "length": 125 + }, + { + "text": "So if this rumor is true, it means that when the iPad 2 ships, you'll have to pick a 3G model based on your carrier preference.", + "length": 127 + }, + { + "text": "The current 3G model of the iPad is not tied to a contract: Customers pay a flat monthly rate for data and can opt out whenever they please.", + "length": 140 + }, + { + "text": "Some third-party protective cases for a purported \"iPad 23 have been cropping up in Asia, hinting at the possibility of a bigger speaker and a rear-facing camera.", + "length": 162 + }, + { + "text": "To explicate the alphabet soup, UMTS is the standard used by major 3G carriers such as AT\u0026T and T-Mobile, while CDMA is compatible with Verizon and Sprint networks.", + "length": 164 + }, + { + "text": "The iPad 2 will support three different wireless configurations: UMTS, CDMA and Wi-Fi only, according to \"industry sources quoted by DigiTimes\" citing component makers.", + "length": 168 + }, + { + "text": "Support for both major wireless standards in the United States will make the iPad 2 available to a much larger potential audience, whereas before it was only available in the states from AT\u0026T.", + "length": 192 + }, + { + "text": "(WIRED) -- Apple's loose-lipped overseas partners are exchanging whispers about the next-generation iPad, claiming it will come in three different versions, one of which would work with Verizon's network.", + "length": 205 + }, + { + "text": "Customers who want to connect to non-AT\u0026T 3G networks must either buy an external wireless hotspot device such as the Verizon MiFi (Verizon already sells a MiFi plus iPad package) or trim a standard SIM card down to MicroSIM size, like Wired.", + "length": 242 + }, + { + "text": "If Verizon gets the iPhone and the iPad, it would greatly expand Apple's potential market, and would also likely deal a severe blow to AT\u0026T, which has been roundly criticized for the inability of its 3G network to keep up with iPhone-induced demand.", + "length": 249 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6209676861763 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:34.944363762Z", + "first_section_created": "2025-12-23T09:34:34.946266838Z", + "last_section_published": "2025-12-23T09:34:34.946404544Z", + "all_results_received": "2025-12-23T09:34:35.01386927Z", + "output_generated": "2025-12-23T09:34:35.014010775Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:34.946266838Z", + "publish_time": "2025-12-23T09:34:34.946404544Z", + "first_worker_start": "2025-12-23T09:34:34.946980667Z", + "last_worker_end": "2025-12-23T09:34:35.012915Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:34.946979567Z", + "start_time": "2025-12-23T09:34:34.94704677Z", + "end_time": "2025-12-23T09:34:34.947101372Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:34.947189Z", + "start_time": "2025-12-23T09:34:34.947337Z", + "end_time": "2025-12-23T09:34:35.012915Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:34.946958466Z", + "start_time": "2025-12-23T09:34:34.947025969Z", + "end_time": "2025-12-23T09:34:34.947106072Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:34.946918165Z", + "start_time": "2025-12-23T09:34:34.946980667Z", + "end_time": "2025-12-23T09:34:34.947008768Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2468, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/00464528a306e3c679fdaf61f60c42ba6f7db6ba.json b/data/output/00464528a306e3c679fdaf61f60c42ba6f7db6ba.json new file mode 100644 index 0000000..2b1bc98 --- /dev/null +++ b/data/output/00464528a306e3c679fdaf61f60c42ba6f7db6ba.json @@ -0,0 +1,290 @@ +{ + "file_name": "00464528a306e3c679fdaf61f60c42ba6f7db6ba.txt", + "total_words": 447, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "group", + "count": 7 + }, + { + "word": "s", + "count": 7 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "nigeria", + "count": 6 + }, + { + "word": "boko", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "Gen.", + "length": 4 + }, + { + "text": "fear Boko Haram?", + "length": 16 + }, + { + "text": "Opinion: Should U.", + "length": 18 + }, + { + "text": "Wednesday (12:30 p.", + "length": 19 + }, + { + "text": "Who are the world's 10 most dangerous terrorists?", + "length": 49 + }, + { + "text": "CNN's Nana Karikari-apau contributed to this report.", + "length": 52 + }, + { + "text": "for several hours,\" according to the Joint Task Force.", + "length": 54 + }, + { + "text": "Suspected members of the extremist group around 5:30 p.", + "length": 55 + }, + { + "text": "The military \"remains on the offensive,\" according to Brig.", + "length": 59 + }, + { + "text": "The military did not provide any information on its casualties.", + "length": 63 + }, + { + "text": "Also in northern Nigeria, Damaturu is the capital of Yobe state.", + "length": 64 + }, + { + "text": "The group released a video boasting that it was growing stronger.", + "length": 65 + }, + { + "text": "This wasn't the only clash between Boko Haram and Nigerian troops of late.", + "length": 74 + }, + { + "text": "Ibrahim Attahiru, who said the operation started Thursday and continued into the next day.", + "length": 90 + }, + { + "text": "CNN's Vlad Duthiers reported from Nigeria, CNN's Greg Botelho wrote this story from Atlanta.", + "length": 92 + }, + { + "text": "In August, its militants allegedly went into a mosque in Borno state and killed 44 worshipers.", + "length": 94 + }, + { + "text": "\"Any credible information should be passed promptly to security agencies for necessary action.", + "length": 94 + }, + { + "text": "But the group did not stay down for long, and has remained an active and violent force in Nigeria.", + "length": 98 + }, + { + "text": "ET) attacked a military checkpoint in Damaturu, Nigeria's Joint Task Force reported in a statement.", + "length": 99 + }, + { + "text": "By the time that fighting was over, 21 suspected Boko Haram fighters were dead, the government group reported.", + "length": 110 + }, + { + "text": "Hundreds of its members, including its leader Mohammed Yusuf, died in July 2009 clashes with government forces.", + "length": 111 + }, + { + "text": "National Counterterrorism Center, including killing and kidnapping Westerners, and bombing schools and churches.", + "length": 112 + }, + { + "text": "Special operations troops responded, waging \"a fierce encounter with the terrorists in various parts of Damaturu ...", + "length": 116 + }, + { + "text": "The group has attacked various targets in the West African nation since its formation in the late 1990s, according to the U.", + "length": 124 + }, + { + "text": "Three vehicles were recovered, as were assault rifles, a rocket-propelled grenade, improvised explosive devices and 709 rounds of ammunition.", + "length": 141 + }, + { + "text": "Boko Haram, which means \"Western education is sacrilege\" in the Hausa-Fulani language, seeks to impose a strict version of Sharia law across northeastern Nigeria, if not the entire country.", + "length": 189 + }, + { + "text": "\"Law abiding citizens are enjoined to remain calm as the 3 Division Special Operation Battalion is on top of the situation,\" the Joint Task Force said, noting a 24-hour curfew was imposed throughout the state.", + "length": 209 + }, + { + "text": "Abuja, Nigeria (CNN) -- More than 70 members of the Islamist extremist group Boko Haram have been killed during a Nigerian military operation in the northeastern state of Borno, an Army spokesman told CNN on Friday.", + "length": 215 + }, + { + "text": "\" Last May, President Goodluck Jonathan put three states in the region under a state of emergency, giving Nigerian forces wide latitude in fighting the group, which human rights organizations say has killed more than 3,000 people since 2009.", + "length": 241 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8081022500991821 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:35.447146575Z", + "first_section_created": "2025-12-23T09:34:35.44752729Z", + "last_section_published": "2025-12-23T09:34:35.447705897Z", + "all_results_received": "2025-12-23T09:34:35.511298967Z", + "output_generated": "2025-12-23T09:34:35.511468773Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:35.44752729Z", + "publish_time": "2025-12-23T09:34:35.447705897Z", + "first_worker_start": "2025-12-23T09:34:35.448200717Z", + "last_worker_end": "2025-12-23T09:34:35.510368Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:35.448214918Z", + "start_time": "2025-12-23T09:34:35.44827832Z", + "end_time": "2025-12-23T09:34:35.448324622Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:35.44843Z", + "start_time": "2025-12-23T09:34:35.448579Z", + "end_time": "2025-12-23T09:34:35.510368Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:35.448238119Z", + "start_time": "2025-12-23T09:34:35.448323522Z", + "end_time": "2025-12-23T09:34:35.448381125Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:35.448146315Z", + "start_time": "2025-12-23T09:34:35.448200717Z", + "end_time": "2025-12-23T09:34:35.448219318Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2762, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/0046814224c867bf3a2e773c86845a4375184478.json b/data/output/0046814224c867bf3a2e773c86845a4375184478.json new file mode 100644 index 0000000..900848d --- /dev/null +++ b/data/output/0046814224c867bf3a2e773c86845a4375184478.json @@ -0,0 +1,302 @@ +{ + "file_name": "0046814224c867bf3a2e773c86845a4375184478.txt", + "total_words": 717, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "chisholm", + "count": 12 + }, + { + "word": "couple", + "count": 11 + }, + { + "word": "for", + "count": 11 + }, + { + "word": "welfare", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "6 acres.", + "length": 8 + }, + { + "text": "2million yacht in Florida.", + "length": 26 + }, + { + "text": "2million yacht in Florida .", + "length": 27 + }, + { + "text": "The couple not only owned a $1.", + "length": 31 + }, + { + "text": "'We will prosecute this case with all the capacity we have.", + "length": 59 + }, + { + "text": "Calls to the family home by MailOnline went unanswered today.", + "length": 61 + }, + { + "text": "2million yacht but also a $30,000 Lexus and beach property in Florida .", + "length": 71 + }, + { + "text": "The couple are still at large after a search was launched by law enforcement last month.", + "length": 88 + }, + { + "text": "Grand lakeside home: The couple moved to a lakefront property on Lake Minnetonka in 2008 .", + "length": 90 + }, + { + "text": "In April 2007, the Chisholms came back to Minnesota and filled out forms for more benefits.", + "length": 91 + }, + { + "text": "They immediately moved to another Deephaven home on Lake Minnetonka with stunning waterfront views.", + "length": 99 + }, + { + "text": "Noble roots: According to their company website, the couple trace their roots to Clan Chisholm in Struy .", + "length": 105 + }, + { + "text": "Chisholm is listed as the president and CEO of TCN Network, a satellite TV company serving the Carribbean.", + "length": 106 + }, + { + "text": "The couple were living with Lady Chisholm's mother in Minneapolis when they first applied for welfare in 2005.", + "length": 110 + }, + { + "text": "The home, built in 1909, is believed to have six bedrooms, five bathrooms and stretch to 5,800 sq ft set on 1.", + "length": 110 + }, + { + "text": "Lady Chisholm is alleged to have claimed for state prenatal care despite having millions of dollars in the bank .", + "length": 113 + }, + { + "text": "More than $1 million dollars flowed through accounts he controlled as part of that company, the complaint states.", + "length": 113 + }, + { + "text": "In the welfare applications, the couple allegedly lied about where they were living and who they were living with.", + "length": 114 + }, + { + "text": "In March 2008, they moved into a luxury home in Deephaven with Andrea’s grandparents Eloise and Francis Heidecker.", + "length": 116 + }, + { + "text": "Lady Chisholm had her prenatal care for the couple's son in 2006 paid for by the state while claiming to be desitute.", + "length": 117 + }, + { + "text": "The Chisholms also had $3 million in bank accounts which they failed to declare, according to Hennepin County Attorney's office.", + "length": 128 + }, + { + "text": "Francis died in February 2009 at age 94 and by September of that year, Eloise Heidecker and the Chisholms were evicted from the home.", + "length": 133 + }, + { + "text": "' Hennepin County’s Human Services and Public Health Department terminated all welfare benefits to the Chisholms at the end of March 2012.", + "length": 140 + }, + { + "text": "A couple claiming to be wealthy Scottish aristocrats are alleged to have raked in $165,000 over years of welfare fraud while living on their $1.", + "length": 144 + }, + { + "text": "Colin Chisholm III, claims he is from a long line of Scottish aristocracy and is a wealthy broadcasting executive, according to court filings today.", + "length": 148 + }, + { + "text": "The couple were receiving public assistance for their medical care and food stamps in Minnesota for years while also allegedly collecting welfare in Florida.", + "length": 157 + }, + { + "text": "According to the criminal complaint filed in February, the Chisholms applied for more than a dozen forms of medical assistance, welfare payments and food stamps.", + "length": 161 + }, + { + "text": "Colin Chisholm did not declare that he ran a business nor Lady Chisholm that she had a dog kennel which breeds championship-worthy Cavalier King Charles Spaniels.", + "length": 162 + }, + { + "text": "None of these details were declaired on welfare forms - nor the fact Andrea Chisholm had power of attorney and controlled the finances of her elderly grandmother.", + "length": 162 + }, + { + "text": "However for the next two years, they resided on their Florida yacht, and later at a beach property in the state, none of which was ever declared, investigators claim.", + "length": 166 + }, + { + "text": "According to the website for their company Strathglass Kennel, the couple trace their roots to the Clan Chisholm from the small village of Struy in the Scottish Highlands.", + "length": 171 + }, + { + "text": "'Scottish aristocrats' Colin Chisholm the Third, 62, and his wife Lady Andrea, 54, have been charged with $165,000 in welfare fraud while it is alleged they were living on a $1.", + "length": 177 + }, + { + "text": "Along with a million-dollar yacht called the Andrea Aras, the couple have a grand, six-bedroom lakeside home, a beach property in Florida and a $30,000 Lexus, investigators found.", + "length": 179 + }, + { + "text": "Prosecutors will ask that the judge be allowed to impose a longer sentence than the Minnesota Sentencing Guidelines call for because the Chisholms committed a major economic offense.", + "length": 182 + }, + { + "text": "Hennepin County Attorney Mike Freeman said on Friday: 'It is truly outrageous when persons of considerable means steal from the government and all of us taxpayers through abusing the social welfare system.", + "length": 205 + }, + { + "text": "Colin Chisholm III, 62, and his wife Lady Andrea, 54, of Deephaven, Minnesota, have been charged with wrongfully obtaining thousands of dollars between 2005 and 2012 from various government assistance programs.", + "length": 210 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6657701730728149 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:35.947863304Z", + "first_section_created": "2025-12-23T09:34:35.948173117Z", + "last_section_published": "2025-12-23T09:34:35.948362225Z", + "all_results_received": "2025-12-23T09:34:36.014134682Z", + "output_generated": "2025-12-23T09:34:36.01432679Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:35.948173117Z", + "publish_time": "2025-12-23T09:34:35.948362225Z", + "first_worker_start": "2025-12-23T09:34:35.949082354Z", + "last_worker_end": "2025-12-23T09:34:36.013143Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:35.949006251Z", + "start_time": "2025-12-23T09:34:35.949082354Z", + "end_time": "2025-12-23T09:34:35.949179358Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:35.9493Z", + "start_time": "2025-12-23T09:34:35.949484Z", + "end_time": "2025-12-23T09:34:36.013143Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:35.949094454Z", + "start_time": "2025-12-23T09:34:35.949167957Z", + "end_time": "2025-12-23T09:34:35.949264661Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:35.949031252Z", + "start_time": "2025-12-23T09:34:35.949100854Z", + "end_time": "2025-12-23T09:34:35.949140556Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4310, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0046a356cc90e393be41135da101f83da40ee6a3.json b/data/output/0046a356cc90e393be41135da101f83da40ee6a3.json new file mode 100644 index 0000000..34cd56e --- /dev/null +++ b/data/output/0046a356cc90e393be41135da101f83da40ee6a3.json @@ -0,0 +1,210 @@ +{ + "file_name": "0046a356cc90e393be41135da101f83da40ee6a3.txt", + "total_words": 186, + "top_n_words": [ + { + "word": "the", + "count": 15 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "s", + "count": 7 + }, + { + "word": "a", + "count": 5 + }, + { + "word": "police", + "count": 5 + }, + { + "word": "cnn", + "count": 4 + }, + { + "word": "is", + "count": 4 + }, + { + "word": "istanbul", + "count": 4 + }, + { + "word": "at", + "count": 3 + }, + { + "word": "in", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": ", Istanbul Gov.", + "length": 15 + }, + { + "text": "Vasip Sahin told reporters.", + "length": 27 + }, + { + "text": "Police cordoned off the area.", + "length": 29 + }, + { + "text": "CNN's Hande Atay contributed to this report.", + "length": 44 + }, + { + "text": "Sahin did not mention a motive for the attack.", + "length": 46 + }, + { + "text": "CNN's Gul Tuysuz reported and wrote from Istanbul, and CNN's Jason Hanna wrote in Atlanta.", + "length": 90 + }, + { + "text": "The attacker's identity is unknown and the incident is being investigated, the governor told reporters.", + "length": 103 + }, + { + "text": "Later Tuesday, Turkey's semi-official Anadolu news agency reported that one of the officers died of his wounds at a hospital.", + "length": 125 + }, + { + "text": "The bomber, speaking English, entered the police station saying she lost her wallet, and the explosion happened at about 5:20 p.", + "length": 128 + }, + { + "text": "Sahin initially said that the blast, besides killing the bomber, critically injured one police officer and slightly wounded another.", + "length": 132 + }, + { + "text": "The attack happened in the section of Turkey's largest city that is home to landmarks such as the Hagia Sophia and the Blue Mosque, and is heavily trafficked by tourists.", + "length": 170 + }, + { + "text": "Istanbul (CNN)A woman carried out a suicide bombing at a police station in Istanbul's historic Sultanahmet district Tuesday evening, killing one police officer and injuring another, officials said.", + "length": 197 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8777233362197876 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:36.448695639Z", + "first_section_created": "2025-12-23T09:34:36.44896965Z", + "last_section_published": "2025-12-23T09:34:36.449130156Z", + "all_results_received": "2025-12-23T09:34:36.51356126Z", + "output_generated": "2025-12-23T09:34:36.513685365Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:36.44896965Z", + "publish_time": "2025-12-23T09:34:36.449130156Z", + "first_worker_start": "2025-12-23T09:34:36.449700679Z", + "last_worker_end": "2025-12-23T09:34:36.512465Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:36.449675978Z", + "start_time": "2025-12-23T09:34:36.449734781Z", + "end_time": "2025-12-23T09:34:36.449761282Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:36.44989Z", + "start_time": "2025-12-23T09:34:36.450041Z", + "end_time": "2025-12-23T09:34:36.512465Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:36.449681479Z", + "start_time": "2025-12-23T09:34:36.449743781Z", + "end_time": "2025-12-23T09:34:36.449788483Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:36.449656378Z", + "start_time": "2025-12-23T09:34:36.449700679Z", + "end_time": "2025-12-23T09:34:36.44971328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1118, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0046c1bc6886e3cdd5c4a7e4603bd537c3ced3dd.json b/data/output/0046c1bc6886e3cdd5c4a7e4603bd537c3ced3dd.json new file mode 100644 index 0000000..de70150 --- /dev/null +++ b/data/output/0046c1bc6886e3cdd5c4a7e4603bd537c3ced3dd.json @@ -0,0 +1,298 @@ +{ + "file_name": "0046c1bc6886e3cdd5c4a7e4603bd537c3ced3dd.txt", + "total_words": 711, + "top_n_words": [ + { + "word": "he", + "count": 37 + }, + { + "word": "was", + "count": 21 + }, + { + "word": "the", + "count": 20 + }, + { + "word": "it", + "count": 19 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "that", + "count": 17 + }, + { + "word": "his", + "count": 16 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "for", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "He can’t stop smiling.", + "length": 24 + }, + { + "text": "‘Daniel is thrilled with his new hand.", + "length": 40 + }, + { + "text": "He is able to pick his toys up for the first time too.", + "length": 54 + }, + { + "text": "It has given him so much more independence and confidence.", + "length": 58 + }, + { + "text": "She said: ‘It was a massive shock to me when I found out.", + "length": 59 + }, + { + "text": "It’s something that I never thought I’d be able to see.", + "length": 59 + }, + { + "text": "'He is so excited for Christmas now he’s got his new hand.", + "length": 60 + }, + { + "text": "There was only one present Daniel Tennant wanted for Christmas this year.", + "length": 73 + }, + { + "text": "‘He started school in September and he just wanted to be like the other children.", + "length": 83 + }, + { + "text": "But it was a massive shock to me as I hadn’t imagined anything would be wrong with him.", + "length": 89 + }, + { + "text": "‘He never needed anything else, he just adapted to doing things without any proper hands.", + "length": 91 + }, + { + "text": "‘Then the midwife told me that he was perfect, but there were some problems with his arms.", + "length": 92 + }, + { + "text": "’ It wasn’t until he was born that Stephanie realised there was anything wrong with Daniel.", + "length": 95 + }, + { + "text": "'It was wonderful to see him being able to pull Christmas crackers for the first time this year.", + "length": 96 + }, + { + "text": "'He was thrilled when he tried on his new hand for the first time, and could pick things up properly.", + "length": 101 + }, + { + "text": "I could see that his left arm stopped above the elbow and he only had a very small arm on his right side.", + "length": 105 + }, + { + "text": "It means that he can do so much more now that he can grip things like his presents and pull crackers too.", + "length": 105 + }, + { + "text": "‘There was no explanation for why it had happened in the womb, I was told it was just one of those things.", + "length": 108 + }, + { + "text": "Stephanie who lives in Bathgate, West Lothian, said: ‘We are going to have a wonderful Christmas this year.", + "length": 109 + }, + { + "text": "He was fitted for the prosthetic arm and it was finally ready earlier this month - just in time for Christmas.", + "length": 110 + }, + { + "text": "He learnt to crawl and walk and he would also pick things up like a ball by balancing it between his two stumps.", + "length": 112 + }, + { + "text": "The brave tot has just been given a new hand for Christmas - after being born without fully formed arms or hands.", + "length": 113 + }, + { + "text": "But then a few months ago he started to say that he really wanted a hand with five fingers, just like everyone else.", + "length": 116 + }, + { + "text": "It means that little Daniel, five, can now open his Christmas presents and pull crackers with his mother Stephanie, 29.", + "length": 119 + }, + { + "text": "‘He has always managed really well, but now he can feel more like the other children now that he’s got his new hand.", + "length": 120 + }, + { + "text": "’ Daniel had a prosthetic arm fitted at six months old, to help him balance when he crawled, but he preferred to try without it.", + "length": 130 + }, + { + "text": "Five-year-old Daniel Tennant is delighted with new hand which means he can now open his own Christmas presents and pull crackers .", + "length": 130 + }, + { + "text": "Stephanie, who is single, said: ‘He never used his prosthetic arm that he had when he was a baby, and he managed fine without it.", + "length": 131 + }, + { + "text": "The youngster was born without fully formed arms or legs, but preferred to live without the prosthetic arm he had fitted at six months .", + "length": 136 + }, + { + "text": "My mum and aunty were in the delivery room at the hospital with me and I saw the shock on my aunt’s face when she saw him for the first time.", + "length": 143 + }, + { + "text": "’ So Stephanie took him to the Astley Ainslie Hospital in Edinburgh and doctors made him a prosthetic hand with fingers that opened and closed.", + "length": 145 + }, + { + "text": "Doctors at Edinburgh's Astley Ainslie Hospital made Daniel a new hand with fingers that open and close and it was fitted last month, just in time for Christmas .", + "length": 161 + }, + { + "text": "Nothing had been picked up during her pregnancy scans and it was only when he was born that she was told that there was a problem with the development of his arms.", + "length": 163 + }, + { + "text": "Stephanie, who is supported by REACH, a charity that helps children with upper limb deformities, said: ‘It really is the best present that Daniel could ever have wished for.", + "length": 175 + }, + { + "text": "Daniel's mother Stephanie said: 'It really is the best present that Daniel could ever have wished for' ‘When he was a baby he just played with his plastic arm just like he would do a toy, then as he grew into a toddler he ignored it.", + "length": 235 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.4308854043483734 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:36.949893988Z", + "first_section_created": "2025-12-23T09:34:36.951310245Z", + "last_section_published": "2025-12-23T09:34:36.951504953Z", + "all_results_received": "2025-12-23T09:34:37.010623242Z", + "output_generated": "2025-12-23T09:34:37.010748847Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:36.951310245Z", + "publish_time": "2025-12-23T09:34:36.951504953Z", + "first_worker_start": "2025-12-23T09:34:36.952052375Z", + "last_worker_end": "2025-12-23T09:34:37.009811Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:36.952060476Z", + "start_time": "2025-12-23T09:34:36.952136579Z", + "end_time": "2025-12-23T09:34:36.952228282Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:36.952394Z", + "start_time": "2025-12-23T09:34:36.952551Z", + "end_time": "2025-12-23T09:34:37.009811Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:36.952136979Z", + "start_time": "2025-12-23T09:34:36.952215582Z", + "end_time": "2025-12-23T09:34:36.952530795Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:36.951984572Z", + "start_time": "2025-12-23T09:34:36.952052375Z", + "end_time": "2025-12-23T09:34:36.952092677Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3774, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/0046d124893430e7c4071585007d79aa0191c067.json b/data/output/0046d124893430e7c4071585007d79aa0191c067.json new file mode 100644 index 0000000..e0e3231 --- /dev/null +++ b/data/output/0046d124893430e7c4071585007d79aa0191c067.json @@ -0,0 +1,564 @@ +{ + "file_name": "0046d124893430e7c4071585007d79aa0191c067.txt", + "total_words": 1161, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "of", + "count": 35 + }, + { + "word": "to", + "count": 34 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "erdogan", + "count": 22 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "that", + "count": 20 + }, + { + "word": "have", + "count": 18 + }, + { + "word": "he", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": ".", + "length": 1 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "vote.", + "length": 5 + }, + { + "text": "Turkey.", + "length": 7 + }, + { + "text": "'They .", + "length": 7 + }, + { + "text": "Turks .", + "length": 7 + }, + { + "text": "Square.", + "length": 7 + }, + { + "text": "We have .", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Erdogan's .", + "length": 11 + }, + { + "text": "' Erdogan .", + "length": 11 + }, + { + "text": "Protesters .", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Jill Reilly .", + "length": 13 + }, + { + "text": "It's not true.", + "length": 14 + }, + { + "text": "'Together we are .", + "length": 18 + }, + { + "text": "landslide elections.", + "length": 20 + }, + { + "text": "03:31 EST, 7 June 2013 .", + "length": 24 + }, + { + "text": "04:37 EST, 7 June 2013 .", + "length": 24 + }, + { + "text": "endeavored to break hearts.", + "length": 27 + }, + { + "text": "We are in favor of mending hearts.", + "length": 34 + }, + { + "text": "' But he soon became more combative.", + "length": 36 + }, + { + "text": "Erdogan at times was almost drowned .", + "length": 37 + }, + { + "text": "manner - charges he vehemently denies.", + "length": 38 + }, + { + "text": "Tens of thousands of protesters have .", + "length": 38 + }, + { + "text": "' 'Turkey is absolutely at a crossroads.", + "length": 40 + }, + { + "text": "One person is on life support in Ankara.", + "length": 40 + }, + { + "text": "'Istanbul is here, where are the looters?", + "length": 41 + }, + { + "text": "But we cannot applaud brutality,' he said.", + "length": 42 + }, + { + "text": "used, and promising it would be investigated.", + "length": 45 + }, + { + "text": "'Let us go, let us smash them,' they shouted.", + "length": 45 + }, + { + "text": "say I am the prime minister of only 50 percent.", + "length": 47 + }, + { + "text": "International Airport in Istanbul this morning .", + "length": 48 + }, + { + "text": "Turkey as a model of democracy anymore,' he said.", + "length": 49 + }, + { + "text": "'God is Great,' they chanted, and soon moved on to .", + "length": 52 + }, + { + "text": "One protester is on life support in a hospital in Ankara.", + "length": 57 + }, + { + "text": "'We have never been for building tension and polarization.", + "length": 58 + }, + { + "text": "Together we are brothers,' he said, adding 'We have never .", + "length": 59 + }, + { + "text": "signal of whether the demonstrations would fizzle or rage on.", + "length": 61 + }, + { + "text": "speech, delivered from atop an open-air bus outside the airport .", + "length": 65 + }, + { + "text": "slogans referring specifically to the protesters in Taksim Square.", + "length": 66 + }, + { + "text": "Welcome: Supporters dance and wave Turkish flags upon his arrival .", + "length": 67 + }, + { + "text": "to what they say is Erdogan's increasingly autocratic and arrogant .", + "length": 68 + }, + { + "text": "while also acknowledging that excessive police force might have been .", + "length": 70 + }, + { + "text": "have been awaiting Erdogan's words upon his return, seeing them as a .", + "length": 70 + }, + { + "text": "referring to his election win in 2011, when he took 50 percent of the .", + "length": 71 + }, + { + "text": "'I do not believe his sincerity,' said protester Hazer Berk Buyukturca.", + "length": 71 + }, + { + "text": "terminal, appeared at first to be an attempt to strike a unifying note.", + "length": 71 + }, + { + "text": "out by his supporters, part of the base that has helped him win three .", + "length": 71 + }, + { + "text": "as a small protest against a plan to develop Istanbul's central Taksim .", + "length": 72 + }, + { + "text": "Emne Erdogan (right) are greeted by supporters upon arrival at Ataturk .", + "length": 72 + }, + { + "text": "He added that the flames of dissent had been fanned by other groups too.", + "length": 72 + }, + { + "text": "had initially referred to the protesters as looters and troublemakers, .", + "length": 72 + }, + { + "text": "sparked by the violent police reaction last Friday to what started out .", + "length": 72 + }, + { + "text": "Return: Turkish Prime Minister Recep Tayyip Erdogan (left) and his wife .", + "length": 73 + }, + { + "text": "from all walks of life have occupied the square and its park, objecting .", + "length": 73 + }, + { + "text": "served the whole of the 76 million from the east to the west,' he said, .", + "length": 73 + }, + { + "text": "held demonstrations that have spread to dozens of cities across Turkey, .", + "length": 73 + }, + { + "text": "More than 10,000 others filled a busy street in a middle class area of Ankara.", + "length": 78 + }, + { + "text": "Anger: Anti-government protesters gather in Istanbul's Taksim square last night .", + "length": 81 + }, + { + "text": "Interior Minister Muammer Guler said more than 500 police officers had been injured .", + "length": 85 + }, + { + "text": "'These protests that are bordering on illegality must come to an end as of now,' he said.", + "length": 89 + }, + { + "text": "Demonstrations: Anti-government protesters shout slogans and wave Turkish national flags .", + "length": 90 + }, + { + "text": "Nearly 80 protesters were still hospitalized, and almost all detained protesters had been released.", + "length": 99 + }, + { + "text": "Since then, three people have died - two protesters and a policeman - and thousands have been wounded.", + "length": 102 + }, + { + "text": "In a twist, Erdogan implied that bankers were also part of a conspiracy that was fuelling the protests.", + "length": 103 + }, + { + "text": "Strong words: 'These protests that are bordering on illegality must come to an end as of now,' he said .", + "length": 104 + }, + { + "text": "Prepared: Protesters pass bottles of water to each other at Taksim square of Istanbul during a protest .", + "length": 104 + }, + { + "text": "A total of 746 protests had erupted, causing some 70 million Turkish Lira ($37 million) in damages, he said.", + "length": 108 + }, + { + "text": "Thousands of supporters cheered as Turkey's prime minister returned to his troubled country early this morning.", + "length": 111 + }, + { + "text": "Tension: Protesters have accused Recep Tayyip Erdoga of being authoritarian and trying to impose Islamic values .", + "length": 113 + }, + { + "text": "Efforts: Protesters pass bricks to each other as they try to form a barricade near Taksim square of Istanbul during a protest .", + "length": 127 + }, + { + "text": "Those comments don't appear to have swayed many of the thousands of protesters who thronged the square for a sixth day Thursday.", + "length": 128 + }, + { + "text": "Cheered: In the first extensive public show of support since anti-government protests erupted more than 10,000 supporters cheered .", + "length": 131 + }, + { + "text": "Rapturous: Supporters of Turkish Prime Minister Recep Tayyip Erdogan chant upon Erdogan's arrival at Ataturk International Airport .", + "length": 132 + }, + { + "text": "So far, 4,300 people have been hurt or sought medical attention for the effects of tear gas during the protests, the Turkish Human Rights Foundation said.", + "length": 154 + }, + { + "text": "That includes attempts to impose what many say are restrictive mores on their personal lives, such as how many children to have or whether to drink alcohol.", + "length": 156 + }, + { + "text": "Despite earlier comments that suggested he could be softening his stand, Erdogan delivered a fiery speech on his return from a four-day trip to North Africa.", + "length": 157 + }, + { + "text": "Over the past week the demonstrations have spread to 78 cities, growing into public venting of what protesters perceive to be Erdogan's increasing arrogance.", + "length": 157 + }, + { + "text": "In his last speech in Tunisia before flying to Istanbul, Erdogan had said that terrorist groups were involved in the protests, saying they had been identified.", + "length": 159 + }, + { + "text": "'Those who call themselves journalists, artists, politicians, have, in a very irresponsible way, opened the way for hatred, discrimination and provocation,' he said.", + "length": 165 + }, + { + "text": "Stance: Despite earlier comments that suggested he could be softening his stand, Erdogan delivered a fiery speech on his return from a four-day trip to North Africa .", + "length": 166 + }, + { + "text": "Recep Tayyip Erdogan told supporters that had thronged to greet him outside Istanbul's international airport that the protests that have swept the country must come to an end.", + "length": 175 + }, + { + "text": "In his earlier comments in Tunisia, Erdogan acknowledged that some Turks were involved in the protests out of environmental concerns, and said he had 'love and respect' for them.", + "length": 178 + }, + { + "text": "Mix: Protesters from all walks of life have occupied the square and its park, objecting to what they say is Erdogan's increasingly autocratic and arrogant manner - charges he vehemently denies .", + "length": 194 + }, + { + "text": "Rise: Dozens of Turkish Twitter users accused of inciting protests via the microblogging website were released from detention as the death toll from days of anti-government protests rose to four .", + "length": 196 + }, + { + "text": "Outcry: What started as an outcry against a local development project has snowballed into widespread anger against what critics say is the government's increasingly conservative and authoritarian agenda .", + "length": 204 + }, + { + "text": "Wrath: Erdogan denounced those behind a week of violent demonstrations on, causing a sell-off on the Turkish stock exchange from investors worried that his defiant rhetoric will further inflame public wrath .", + "length": 208 + }, + { + "text": "Turkey's main stock market revealed the fears that Erdogan's comments would do little to defuse the protesters, with the general price index plunging by 8 percent after his comments on concerns that continuing unrest would hit the country's economy.", + "length": 249 + }, + { + "text": "Speaking before Erdogan's return, Koray Caliskan, professor of political science and international relations at Bosporus University, pointed out that the prime minister was maintaining a hard line because 'until now Erdogan had always gained support by increasing the tension in the country.", + "length": 291 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6881006360054016 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:37.452273985Z", + "first_section_created": "2025-12-23T09:34:37.454164961Z", + "last_section_published": "2025-12-23T09:34:37.454466574Z", + "all_results_received": "2025-12-23T09:34:37.559808229Z", + "output_generated": "2025-12-23T09:34:37.560014738Z", + "total_processing_time_ms": 107, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 105, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:37.454164961Z", + "publish_time": "2025-12-23T09:34:37.45437137Z", + "first_worker_start": "2025-12-23T09:34:37.454861289Z", + "last_worker_end": "2025-12-23T09:34:37.532846Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:37.454984294Z", + "start_time": "2025-12-23T09:34:37.455037197Z", + "end_time": "2025-12-23T09:34:37.455143601Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:37.455115Z", + "start_time": "2025-12-23T09:34:37.455257Z", + "end_time": "2025-12-23T09:34:37.532846Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:37.454812488Z", + "start_time": "2025-12-23T09:34:37.454889991Z", + "end_time": "2025-12-23T09:34:37.455011196Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:37.454789187Z", + "start_time": "2025-12-23T09:34:37.454861289Z", + "end_time": "2025-12-23T09:34:37.454921092Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:37.454401771Z", + "publish_time": "2025-12-23T09:34:37.454466574Z", + "first_worker_start": "2025-12-23T09:34:37.455030196Z", + "last_worker_end": "2025-12-23T09:34:37.558928Z", + "total_journey_time_ms": 104, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:37.454984794Z", + "start_time": "2025-12-23T09:34:37.455074598Z", + "end_time": "2025-12-23T09:34:37.4551097Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:37.455351Z", + "start_time": "2025-12-23T09:34:37.455493Z", + "end_time": "2025-12-23T09:34:37.558928Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 103 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:37.455038897Z", + "start_time": "2025-12-23T09:34:37.455079198Z", + "end_time": "2025-12-23T09:34:37.455154601Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:37.454982294Z", + "start_time": "2025-12-23T09:34:37.455030196Z", + "end_time": "2025-12-23T09:34:37.455050497Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 180, + "min_processing_ms": 77, + "max_processing_ms": 103, + "avg_processing_ms": 90, + "median_processing_ms": 103, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3585, + "slowest_section_id": 1, + "slowest_section_time_ms": 104 + } +} diff --git a/data/output/0046eec400a23bb8545acfe7a8399587e957d5a2.json b/data/output/0046eec400a23bb8545acfe7a8399587e957d5a2.json new file mode 100644 index 0000000..c8270ca --- /dev/null +++ b/data/output/0046eec400a23bb8545acfe7a8399587e957d5a2.json @@ -0,0 +1,258 @@ +{ + "file_name": "0046eec400a23bb8545acfe7a8399587e957d5a2.txt", + "total_words": 474, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "ms", + "count": 10 + }, + { + "word": "had", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "a", + "count": 7 + }, + { + "word": "been", + "count": 7 + }, + { + "word": "on", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Video courtesy of Rome Reports .", + "length": 32 + }, + { + "text": "Ms Boggia had been there since the 1970s.", + "length": 41 + }, + { + "text": "It appeared all three Italian nuns had been raped.", + "length": 50 + }, + { + "text": "Ms Pulici had been celebrate her 76th birthday on Monday.", + "length": 57 + }, + { + "text": "The third nun, Ms Boggia, found the pair and raised the alarm.", + "length": 62 + }, + { + "text": "Dedicated: Ms Boggia pictured recently teaching children at the convent's school how to write .", + "length": 95 + }, + { + "text": "Attack: The attacker appears to have broken into the Roman Catholic convent during the night on Sunday .", + "length": 104 + }, + { + "text": "'It is very difficult to know the reason behind the killing, but nothing can justify it,' Father Mario said.", + "length": 108 + }, + { + "text": "Devastated: Colleagues and the Vatican are mourning the loss as police probe claims it was a botched robbery .", + "length": 110 + }, + { + "text": "Investigation: Father Mario Pulcini, abbot of the parish, is in talks with police to help catch the attacker .", + "length": 110 + }, + { + "text": "Grief: The missionaries at the convent (pictured) had been due to celebrate Ms Pulici's 76th birthday on Monday .", + "length": 113 + }, + { + "text": "Horrific: Lucia Pulici (left) and Olga Rachietti (right) were raped and decapitated in their convent in Burundi .", + "length": 113 + }, + { + "text": "Murdered: Bernadette Boggia, 79, (left) was also killed after finding her fellow missionaries slain in their room .", + "length": 115 + }, + { + "text": "But early on Monday morning other missionaries heard suspicious noises and rushed to find Ms Boggia also dead in her room.", + "length": 122 + }, + { + "text": "Three elderly Italian nuns were raped and beaten before two were decapitated and another murdered in a convent in Burundi.", + "length": 123 + }, + { + "text": "The Vatican said Pope Francis was 'greatly saddened' by the killings of the women, who had worked at the African convent for years.", + "length": 131 + }, + { + "text": "Evidence showed that two of the Roman Catholic nuns had been raped before they were killed, police spokesman Hermenegilde Harimenshi said.", + "length": 138 + }, + { + "text": "Police said three suspects had been detained for questioning as they probe claims it was a botched robbery at the hands of a mentally unbalanced attacker.", + "length": 154 + }, + { + "text": "The bodies of Bernardetta Boggia, 79, Lucia Pulici, 75, and Olga Raschietti, 82, were found in their dormitory in Kamenge, north of the capital of Bujumbura.", + "length": 157 + }, + { + "text": "The pope offered condolences in the 'tragic death' of the nuns to their families, the local parishioners and the sisters' order, the Xavarian Missionary Sisters of Mary.", + "length": 169 + }, + { + "text": "'Pope Francis has learned with great sadness of the murder of three nuns,' Vatican Secretary of State Cardinal Pietro Parolin said in a telegram sent on the pope's behalf.", + "length": 171 + }, + { + "text": "Ms Pulici and Ms Raschietti had served in Burundi for seven years, after working several years in the east of another central African state, the Democratic Republic of Congo.", + "length": 174 + }, + { + "text": "Father Mario Pulicini, who is responsible for the parish in a northern suburb of Bujumbura, said Ms Pulici and Ms Raschietti were found 'partially decapitated' in their dormitory on Sunday.", + "length": 189 + }, + { + "text": "The Catholic diocese in Parma, Italy, said on its website that the death of Pulici and Raschietti appeared to have been 'the tragic outcome of an armed robbery by a mentally unbalanced person'.", + "length": 193 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8092746734619141 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:37.955006296Z", + "first_section_created": "2025-12-23T09:34:37.955286807Z", + "last_section_published": "2025-12-23T09:34:37.955478215Z", + "all_results_received": "2025-12-23T09:34:38.022822636Z", + "output_generated": "2025-12-23T09:34:38.022982542Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:37.955286807Z", + "publish_time": "2025-12-23T09:34:37.955478215Z", + "first_worker_start": "2025-12-23T09:34:37.955980335Z", + "last_worker_end": "2025-12-23T09:34:38.021928Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:37.955947134Z", + "start_time": "2025-12-23T09:34:37.955997336Z", + "end_time": "2025-12-23T09:34:37.956046638Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:37.956112Z", + "start_time": "2025-12-23T09:34:37.956258Z", + "end_time": "2025-12-23T09:34:38.021928Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:37.955909033Z", + "start_time": "2025-12-23T09:34:37.955988636Z", + "end_time": "2025-12-23T09:34:37.956070039Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:37.955917033Z", + "start_time": "2025-12-23T09:34:37.955980335Z", + "end_time": "2025-12-23T09:34:37.956003936Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2878, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/004706a261e103c1c0adcc84b1b7cafdbb2d9ecd.json b/data/output/004706a261e103c1c0adcc84b1b7cafdbb2d9ecd.json new file mode 100644 index 0000000..f2b8af2 --- /dev/null +++ b/data/output/004706a261e103c1c0adcc84b1b7cafdbb2d9ecd.json @@ -0,0 +1,384 @@ +{ + "file_name": "004706a261e103c1c0adcc84b1b7cafdbb2d9ecd.txt", + "total_words": 1026, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "and", + "count": 30 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "he", + "count": 23 + }, + { + "word": "tostee", + "count": 23 + }, + { + "word": "was", + "count": 21 + }, + { + "word": "a", + "count": 18 + }, + { + "word": "his", + "count": 18 + }, + { + "word": "of", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "20am.", + "length": 5 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'This man seemed to have bypassed officers...", + "length": 45 + }, + { + "text": "The accused killer was in protective custody.", + "length": 45 + }, + { + "text": "'My foolish and immature attitude has cost me ...", + "length": 49 + }, + { + "text": "'I am completely ashamed of myself,' he wrote, 'I ...", + "length": 53 + }, + { + "text": "Sorry we are not currently accepting comments on this article.", + "length": 62 + }, + { + "text": "' Tostee has been in jail since he was charged with Ms Wright's murder.", + "length": 71 + }, + { + "text": "mobile dating app Tinder, and other social media sites including Facebook.", + "length": 74 + }, + { + "text": "put the lives of my friends, other road users and yourselves as officers at risk.", + "length": 81 + }, + { + "text": "to deal with my issues in ways that do not involved drowning myself with alcohol.", + "length": 81 + }, + { + "text": "Tostee is accused of pushing Ms Wright off his Avalon Apartments balcony in August .", + "length": 84 + }, + { + "text": "She was killed after falling 14 storeys from a Surfers Paradise apartment building .", + "length": 84 + }, + { + "text": "This comes after another of Tostee's desperate attempts to get out of jail was revealed.", + "length": 88 + }, + { + "text": "'I have disappointed myself and my family who have done their best to raise and support me.", + "length": 91 + }, + { + "text": "Gable Tostee is accused of murdering New Zealand bank clerk Warriena Wright (above) in August .", + "length": 95 + }, + { + "text": "My memory, concentration and ability to prepare my defence are all deteriorating over time,' he wrote.", + "length": 102 + }, + { + "text": "Since then I have finally come to accept that I have a problem with binge drinking and I am taking measures ...", + "length": 111 + }, + { + "text": "In newly released documents, Tostee said he also held concerns over other prisoners gaining access to his cell.", + "length": 111 + }, + { + "text": "in getting access to my area despite my telling the officers that I did not want to see him and did not know him.", + "length": 113 + }, + { + "text": "In a handwritten note to police, Tostee apologised for drink driving and admits to 'drowning myself with alcohol'.", + "length": 114 + }, + { + "text": "Accused Gable Tostee (left) walked free last month from Queensland's Arthur Gorrie prison after he was granted bail .", + "length": 117 + }, + { + "text": "Tostee has social anxiety and obsessive compulsive disorder which he said was getting worse because of his time in prison.", + "length": 122 + }, + { + "text": "' In his latest affidavit, Tostee said he was 'struggling' while behind bars and also told of a strange encounter with a fellow prisoner .", + "length": 138 + }, + { + "text": "being stopped that night was the wake-up call I needed and I am thankful that it happened before something potentially much worse happened.", + "length": 139 + }, + { + "text": "Recordings made by Tostee on the night he met Ms Wright are being used by police prosecutors and  to build a case against the 28-year-old.", + "length": 139 + }, + { + "text": "The 28-year-old was granted bail last month under strict conditions, including not drinking alcohol and is required to live with his parents.", + "length": 141 + }, + { + "text": "He also claimed he was 'wittier' when drinking alcohol and admits he has been a 'danger to the public' while binge drinking since the age of 17.", + "length": 144 + }, + { + "text": "Tostee left prison on a $200,000 surety put up by his parents on November 18, three months after he was arrested for Ms Wright's alleged murder.", + "length": 144 + }, + { + "text": "The note Tostee penned to help secure his release from prison, apologising for drinking in the early hours of July 27 - weeks before Ms Wright died .", + "length": 149 + }, + { + "text": "Twelve days later, following an alleged drinking session at his Surfers Paradise apartment, Ms Wright - a 26-year-old New Zealand bank clerk - died at 2.", + "length": 153 + }, + { + "text": "In his affidavit, the carpet-layer also told of his strange encounter with a fellow prisoner, who claimed to be a family friend, sneaking his way into his cell.", + "length": 160 + }, + { + "text": "Gable Tostee - the man who is accused of murdering New Zealand woman Warriena Wright - claims he was 'distressed' during his time behind bars in a Queensland prison.", + "length": 165 + }, + { + "text": "Written on November 11, exactly a week before he was granted bail on strict conditions, Tostee's note specifically apologises for drinking in the early hours of July 27.", + "length": 169 + }, + { + "text": "'I accept that the stress of my situation will exacerbate my symptoms but I am very concerned that as the months pass by, I am becoming more and more distressed and unwell.", + "length": 172 + }, + { + "text": "Tostee is accused of being responsible for Ms Wright falling off the 14th floor of an apartment building in Surfers Paradise in August after meeting her on dating app Tinder.", + "length": 174 + }, + { + "text": "The Gold Coast tradesman wrote in an affidavit he was 'struggling' while he was in custody at Wacol's Arthur Gorrie Correctional Centre - in Brisbane's west,The Courier Mail reported.", + "length": 184 + }, + { + "text": "He must live with his parents, Gary and Helene, in Carrara while on bail, not drink alcohol, not use dating app Tinder but wants to 'go out' and have his curfew eased tom 10pm to 4am .", + "length": 184 + }, + { + "text": "Bail documents indicate that investigators are pursuing statements from the women and have requested data from both Mr Tostee and Ms Wright's Tinder accounts, reported The Australian.", + "length": 184 + }, + { + "text": "The note was among several court documents filed by Tostee's lawyers to gain freedom on bail while he awaits his murder trial, which is unlikely to proceed before late 2016 to early 2017.", + "length": 187 + }, + { + "text": "'On 9 October 2014 a man gained access to my cell area and told me that he was a \"family friend\" and had \"sorted out special privileges for [me] with the screws [prison guards]\",' Tostee wrote.", + "length": 193 + }, + { + "text": "Less than two weeks before the 28-year-old allegedly murdered Warriena Wright, who fell to her death from his 14th floor balcony, police detected Tostee at 3am on the highway across the NSW border.", + "length": 198 + }, + { + "text": "The US administrators of Tinder are being pushed by Australian authorities to aid their search for more than 50 women from Australia and overseas who were allegedly contacted by Mr Tostee via the app.", + "length": 200 + }, + { + "text": "In an affidavit filed by Tostee in the Queensland Supreme Court, he recalled 'being in a state of panic' during the July police pursuit and that 'I 100% accept I screwed up' and had been 'so reckless'.", + "length": 202 + }, + { + "text": "He was 'travelling at high speed' and when eventually he was stopped, in Queensland, was charged with high range drink driving in an incident Tostee describes in his handwritten note as 'immature and dangerous'.", + "length": 211 + }, + { + "text": "The one page letter, obtained by Daily Mail Australia, was penned at Queensland's Arthur Gorrie Correctional Centre in a scrappy style with occasional spelling errors and includes an admission by Tostee that 'I have a problem with binge drinking' and that he has put people's lives at risk.", + "length": 290 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.7136566936969757 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:38.456259347Z", + "first_section_created": "2025-12-23T09:34:38.458215927Z", + "last_section_published": "2025-12-23T09:34:38.458472437Z", + "all_results_received": "2025-12-23T09:34:38.537633835Z", + "output_generated": "2025-12-23T09:34:38.537889145Z", + "total_processing_time_ms": 81, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 79, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:38.458215927Z", + "publish_time": "2025-12-23T09:34:38.458400334Z", + "first_worker_start": "2025-12-23T09:34:38.459178665Z", + "last_worker_end": "2025-12-23T09:34:38.536931Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:38.459102962Z", + "start_time": "2025-12-23T09:34:38.459178665Z", + "end_time": "2025-12-23T09:34:38.45929047Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:38.459448Z", + "start_time": "2025-12-23T09:34:38.459574Z", + "end_time": "2025-12-23T09:34:38.536931Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:38.459139664Z", + "start_time": "2025-12-23T09:34:38.459209567Z", + "end_time": "2025-12-23T09:34:38.459326071Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:38.459402774Z", + "start_time": "2025-12-23T09:34:38.459471377Z", + "end_time": "2025-12-23T09:34:38.459520079Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:38.458428135Z", + "publish_time": "2025-12-23T09:34:38.458472437Z", + "first_worker_start": "2025-12-23T09:34:38.459184066Z", + "last_worker_end": "2025-12-23T09:34:38.535126Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:38.459233668Z", + "start_time": "2025-12-23T09:34:38.45928127Z", + "end_time": "2025-12-23T09:34:38.45930357Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:38.459412Z", + "start_time": "2025-12-23T09:34:38.459552Z", + "end_time": "2025-12-23T09:34:38.535126Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:38.459268669Z", + "start_time": "2025-12-23T09:34:38.459313371Z", + "end_time": "2025-12-23T09:34:38.459344872Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:38.459130863Z", + "start_time": "2025-12-23T09:34:38.459184066Z", + "end_time": "2025-12-23T09:34:38.459194266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 152, + "min_processing_ms": 75, + "max_processing_ms": 77, + "avg_processing_ms": 76, + "median_processing_ms": 77, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2963, + "slowest_section_id": 0, + "slowest_section_time_ms": 78 + } +} diff --git a/data/output/00476d1a5325f5a420d92c9bce298b398a2f9cb3.json b/data/output/00476d1a5325f5a420d92c9bce298b398a2f9cb3.json new file mode 100644 index 0000000..fad06db --- /dev/null +++ b/data/output/00476d1a5325f5a420d92c9bce298b398a2f9cb3.json @@ -0,0 +1,306 @@ +{ + "file_name": "00476d1a5325f5a420d92c9bce298b398a2f9cb3.txt", + "total_words": 411, + "top_n_words": [ + { + "word": "to", + "count": 17 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "holiday", + "count": 9 + }, + { + "word": "the", + "count": 9 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "as", + "count": 7 + }, + { + "word": "with", + "count": 7 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "in", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "2.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "Barbados .", + "length": 10 + }, + { + "text": "Rome, Italy .", + "length": 13 + }, + { + "text": "Boston, USA .", + "length": 13 + }, + { + "text": "Chicago, USA .", + "length": 14 + }, + { + "text": "Orlando, USA .", + "length": 14 + }, + { + "text": "New York, USA .", + "length": 15 + }, + { + "text": "Montreal, Canada .", + "length": 18 + }, + { + "text": "Barcelona, Spain .", + "length": 18 + }, + { + "text": "Washington, DC, USA .", + "length": 21 + }, + { + "text": "Pictured: Boston, USA .", + "length": 23 + }, + { + "text": "Kuala Lumpur, Malaysia .", + "length": 24 + }, + { + "text": "You're not the only one.", + "length": 24 + }, + { + "text": "Struggling heading back to work this week?", + "length": 42 + }, + { + "text": "Among the top flight and holiday searches on ba.", + "length": 48 + }, + { + "text": "Pictured: New York City, a top holiday package search .", + "length": 55 + }, + { + "text": "As employees headed back to work, they were quickly dreaming of their next holiday.", + "length": 83 + }, + { + "text": "Views to the British Airways website jumped 68 per cent and reached their peak at lunchtime.", + "length": 92 + }, + { + "text": "Deals to European destinations like Barcelona and Rome (pictured) were also of particular interest .", + "length": 100 + }, + { + "text": "Following the long Christmas break, searches for flights and holiday deals on British Airways soared as people returned to work.", + "length": 128 + }, + { + "text": "The top searches were to popular destinations in the US, Canada and Asia, with some prices as low as £479 for a trip to Montreal.", + "length": 130 + }, + { + "text": "' European destinations were also in more demand this year with searches significantly up for Amsterdam, Berlin, Malta and France.", + "length": 130 + }, + { + "text": "On January 5, deemed 'Miserable Monday,' it seems that many newly-back-to-work employees were already dreaming of their next holiday.", + "length": 133 + }, + { + "text": "'By offering to hold flights with a deposit of just £5 a person, we can give customers time to confirm arrangements with family and friends, making it even easier to book with confidence.", + "length": 188 + }, + { + "text": "'We know lots of people are mulling over places to fly and go on holiday to and have looked at lots of fantasy flights and holidays before they finally commit to a favourite destination and secure their holiday break away.", + "length": 222 + }, + { + "text": "Claire Bentley, managing director of British Airways Holidays, said: 'Almost everyone suffers a 'Miserable Monday' being back at work for the first time after being off during Christmas and New Year so inevitably thoughts turn to summer holidays.", + "length": 246 + }, + { + "text": "com were Boston and Washington, DC (pictured) Views on British Airways' website jumped by 68 per cent, compared to the first day back at work last year, and reached their peak at 12:30pm - during lunchtime - as customers jumped on the internet to scour airline schedules.", + "length": 271 + }, + { + "text": "'And with our sale in full fight, we've also got loads of great offers for holiday-makers with prices starting from as little as £129 per person for a two-night break in Rome or Barcelona and a week's holiday in Barbados or Orlando in a three star hotel is available from just £499 per person.", + "length": 295 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4238985478878021 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:38.959218368Z", + "first_section_created": "2025-12-23T09:34:38.959598283Z", + "last_section_published": "2025-12-23T09:34:38.959746289Z", + "all_results_received": "2025-12-23T09:34:39.02709861Z", + "output_generated": "2025-12-23T09:34:39.027280118Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:38.959598283Z", + "publish_time": "2025-12-23T09:34:38.959746289Z", + "first_worker_start": "2025-12-23T09:34:38.960367814Z", + "last_worker_end": "2025-12-23T09:34:39.026106Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:38.960290611Z", + "start_time": "2025-12-23T09:34:38.960367814Z", + "end_time": "2025-12-23T09:34:38.960436117Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:38.960545Z", + "start_time": "2025-12-23T09:34:38.960695Z", + "end_time": "2025-12-23T09:34:39.026106Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:38.960316912Z", + "start_time": "2025-12-23T09:34:38.960397615Z", + "end_time": "2025-12-23T09:34:38.960475519Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:38.960286511Z", + "start_time": "2025-12-23T09:34:38.960370014Z", + "end_time": "2025-12-23T09:34:38.960396915Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2424, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0047c355574eccc64f176a1bc3325bc1d689ab3d.json b/data/output/0047c355574eccc64f176a1bc3325bc1d689ab3d.json new file mode 100644 index 0000000..4caea33 --- /dev/null +++ b/data/output/0047c355574eccc64f176a1bc3325bc1d689ab3d.json @@ -0,0 +1,310 @@ +{ + "file_name": "0047c355574eccc64f176a1bc3325bc1d689ab3d.txt", + "total_words": 764, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "and", + "count": 23 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "island", + "count": 15 + }, + { + "word": "as", + "count": 12 + }, + { + "word": "is", + "count": 12 + }, + { + "word": "his", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Area: 98 sq km .", + "length": 16 + }, + { + "text": "Population: 267 .", + "length": 17 + }, + { + "text": "Language: English .", + "length": 19 + }, + { + "text": "Religion: Christian .", + "length": 21 + }, + { + "text": "3km) and has an area of 37.", + "length": 27 + }, + { + "text": "Monetary unit: British Pound .", + "length": 30 + }, + { + "text": "Capital: Edinburgh of the Seven Seas .", + "length": 38 + }, + { + "text": "Economy: Subsistence farming, fishing, stamps and coins .", + "length": 57 + }, + { + "text": "Status: British overseas territory, dependency of St Helena .", + "length": 61 + }, + { + "text": "He took up his role as ‘voice of the people’ seven years ago.", + "length": 65 + }, + { + "text": "The New Year’s Honours list is packed with celebrities, top dignitaries and household names.", + "length": 94 + }, + { + "text": "' He also represented the island at state events in London and at conferences around the world.", + "length": 95 + }, + { + "text": "Mr Bates, a former journalist, first heard of the island while working as a press officer in 1995.", + "length": 98 + }, + { + "text": "Mr Bates, a former journalist, first heard of the island while working as a press officer in 1995 .", + "length": 99 + }, + { + "text": "Tristan da Cunha - known by locals as Tristan - has a north to south length of just seven miles (11.", + "length": 100 + }, + { + "text": "It was discovered in 1506 but remained uninhabited until it was used by US whalers in the late 1700s.", + "length": 101 + }, + { + "text": "'The requests were as varied as supplying dog whistles for shepherds, souvenir badges, silk ties and police badges.", + "length": 115 + }, + { + "text": "'I often took things like bus stops signs, wrapped in brown paper, down to the Post Office to send over to the island.", + "length": 118 + }, + { + "text": "'I've met everyone in the Cabinet including the Prime Minister and most of the other Government ministers over the years.", + "length": 121 + }, + { + "text": "'I'm supposed to be retired and when I started I asked my wife if I could do it because I was meant to be a house husband.", + "length": 122 + }, + { + "text": "Chris Bates, who has been awarded an MBE for his services to the world’s most remote inhabited island, Tristan da Cunha .", + "length": 124 + }, + { + "text": "'I have travelled back and forwards to London three times a week to various meetings and it's effectively been a full-time job.", + "length": 127 + }, + { + "text": "Tristan da Cunha is the name of the main island in a remote group of volcanic islands with the same name in the South Atlantic Ocean.", + "length": 133 + }, + { + "text": "Tristan da Cunha is the name of the main island in a remote group of volcanic islands with the same name in the South Atlantic Ocean .", + "length": 134 + }, + { + "text": "'There is nowhere as remote so it grabs people's attention and if you're dealing with a Government minister it catches their interest.", + "length": 134 + }, + { + "text": "But one you might not have heard of is Chris Bates, who has been awarded an MBE for his services to the world’s most remote inhabited island.", + "length": 143 + }, + { + "text": "'The great advantage Tristan has got is that it is so different to anywhere else on the planet and has a unique philosophy and way of life,' he said.", + "length": 149 + }, + { + "text": "At one time Tristan was on the main trading route between Europe and the Indian Ocean, but the small community living there is now extremely isolated.", + "length": 150 + }, + { + "text": "8 square miles (98 sq km) The island is part of the British overseas territory which includes Saint Helena and Ascension thousands of miles to its north.", + "length": 153 + }, + { + "text": "The 66-year-old has been recognised for his work as the UK representative of Tristan da Cunha, a tiny island in the South Atlantic with a population of 267.", + "length": 156 + }, + { + "text": "The 66-year-old has been recognised for his work as the UK representative of Tristan da Cunha, a tiny island in the South Atlantic with a population of 267 .", + "length": 157 + }, + { + "text": "Lying midway between Africa and South America, the island is a British overseas territory and can only be reached by a week-long boat journey from Cape Town.", + "length": 157 + }, + { + "text": "He carried out his duties from his home in Birmingham, often arranging for items such as bus stop signs and gravestones to be delivered to the remote outpost.", + "length": 158 + }, + { + "text": "Lying midway between Africa and South America, the island is a British overseas territory and can only be reached by a week-long boat journey from Cape Town .", + "length": 158 + }, + { + "text": "He carried out his duties from his home in Birmingham, often arranging for items such as bus stop signs and gravestones to be delivered to the remote outpost .", + "length": 159 + }, + { + "text": "The British navy stationed a garrison there during Napoleon's exile on St Helena and, when it was withdrawn, three men stayed behind and became the founders of the present settlement.", + "length": 183 + }, + { + "text": "It is the most remote inhabited island in the world - lying 1,243 miles (2,000km) from the nearest inhabited land Saint Helena and 1,491 miles (2,400km) from the nearest continental land South Africa.", + "length": 200 + }, + { + "text": "Time zone: GMT (same as UK) Mr Bates, who lives with his wife Julie, 53, stepped down from the role this week, but the island’s administrator Alex Mitham said he leaves Tristan da Cunha ‘in a stronger position on the world stage’.", + "length": 237 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.38816505670547485 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:39.46050412Z", + "first_section_created": "2025-12-23T09:34:39.462089485Z", + "last_section_published": "2025-12-23T09:34:39.462324794Z", + "all_results_received": "2025-12-23T09:34:39.528498668Z", + "output_generated": "2025-12-23T09:34:39.528704476Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:39.462089485Z", + "publish_time": "2025-12-23T09:34:39.462324794Z", + "first_worker_start": "2025-12-23T09:34:39.462808414Z", + "last_worker_end": "2025-12-23T09:34:39.527664Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:39.462794313Z", + "start_time": "2025-12-23T09:34:39.462844815Z", + "end_time": "2025-12-23T09:34:39.462941419Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:39.463045Z", + "start_time": "2025-12-23T09:34:39.46319Z", + "end_time": "2025-12-23T09:34:39.527664Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:39.462793013Z", + "start_time": "2025-12-23T09:34:39.462877316Z", + "end_time": "2025-12-23T09:34:39.46297842Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:39.462760912Z", + "start_time": "2025-12-23T09:34:39.462808414Z", + "end_time": "2025-12-23T09:34:39.462855215Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4260, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0047cc2b8885f1f53cfab54d50fcf70998fe9cbc.json b/data/output/0047cc2b8885f1f53cfab54d50fcf70998fe9cbc.json new file mode 100644 index 0000000..558663c --- /dev/null +++ b/data/output/0047cc2b8885f1f53cfab54d50fcf70998fe9cbc.json @@ -0,0 +1,250 @@ +{ + "file_name": "0047cc2b8885f1f53cfab54d50fcf70998fe9cbc.txt", + "total_words": 428, + "top_n_words": [ + { + "word": "to", + "count": 19 + }, + { + "word": "the", + "count": 14 + }, + { + "word": "united", + "count": 13 + }, + { + "word": "herrera", + "count": 10 + }, + { + "word": "s", + "count": 9 + }, + { + "word": "a", + "count": 7 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "gaal", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "October.", + "length": 8 + }, + { + "text": "7million deal.", + "length": 14 + }, + { + "text": "VIDEO Van Gaal set for German shopping spree .", + "length": 46 + }, + { + "text": "VIDEO Ander Herrera set to sign for Manchester United .", + "length": 55 + }, + { + "text": "A dip in form followed but he recovered to help propel Bilbao to fourth place in La Liga.", + "length": 89 + }, + { + "text": "Fresh bid: United have made an improved offer for Southampton and England defender Luke Shaw .", + "length": 94 + }, + { + "text": "Race against time: Herrera's buyout clause at Athletic Bilbao was set to increase on June 30 .", + "length": 94 + }, + { + "text": "Ander Herrera has become Louis van Gaal’s first signing as Manchester United manager in a £28.", + "length": 97 + }, + { + "text": "Alexander Buttner, meanwhile, will join Dynamo Moscow once the details of his fee have been agreed.", + "length": 99 + }, + { + "text": "Despite shelving the signing last August, Moyes had promised Herrera he would monitor his progress.", + "length": 99 + }, + { + "text": "Forward planning: Incoming United boss Louis van Gaal is aiming to strengthen his squad this summer .", + "length": 101 + }, + { + "text": "Wanted man: Manchester United have made a second move to sign Athletic Bilbao midfielder Ander Herrera .", + "length": 104 + }, + { + "text": "At that time they did not believe it was worth paying the best part of £5m more to make the deal happen.", + "length": 105 + }, + { + "text": "Bright future: Herrera (right), pictured in action against Barcelona, becomes Louis van Gaal's first signing .", + "length": 110 + }, + { + "text": "Herrera, an intelligent and stylish midfielder, was the subject of a £24m offer from David Moyes’s United last summer.", + "length": 121 + }, + { + "text": "Van Gaal is already starting to put his mark on the squad, with Antonio Valencia and Patrice Evra both signing contract extensions.", + "length": 131 + }, + { + "text": "United have improved their original offer for Southampton and England left back Luke Shaw with a bid understood to be around £30m.", + "length": 131 + }, + { + "text": "United are due to tour the United States later this month before opening their Premier League campaign against Swansea on August 16.", + "length": 132 + }, + { + "text": "Herrera, part of Spain’s Olympic squad at London 2012, was also a team-mate of United’s Juan Mata and David de Gea with Spain Under 21s.", + "length": 140 + }, + { + "text": "Herrera has been watched by United for more than three years, with Sir Alex Ferguson’s brother Martin among the first to bring him to the club’s attention.", + "length": 159 + }, + { + "text": "United moved to pay the buy-out clause fee for the 24-year-old Athletic Bilbao midfielder on Tuesday and he is travelling to London before signing a four-year contract.", + "length": 168 + }, + { + "text": "They know the buy-out figure is due to rise to £32m from July 1 and Ed Woodward, the club’s executive vice-chairman, did not want another saga to drag on this summer.", + "length": 169 + }, + { + "text": "United denied problems trying to sign Herrera last year — namely when three lawyers turned up at La Liga headquarters in an attempt to push through the transfer without knowledge of either club.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.47376927733421326 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:39.963135128Z", + "first_section_created": "2025-12-23T09:34:39.963517443Z", + "last_section_published": "2025-12-23T09:34:39.963716751Z", + "all_results_received": "2025-12-23T09:34:40.026142073Z", + "output_generated": "2025-12-23T09:34:40.026287979Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:39.963517443Z", + "publish_time": "2025-12-23T09:34:39.963716751Z", + "first_worker_start": "2025-12-23T09:34:39.964165169Z", + "last_worker_end": "2025-12-23T09:34:40.024705Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:39.964145468Z", + "start_time": "2025-12-23T09:34:39.964207871Z", + "end_time": "2025-12-23T09:34:39.964265273Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:39.964439Z", + "start_time": "2025-12-23T09:34:39.964579Z", + "end_time": "2025-12-23T09:34:40.024705Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:39.964130968Z", + "start_time": "2025-12-23T09:34:39.96419487Z", + "end_time": "2025-12-23T09:34:39.964254673Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:39.964118267Z", + "start_time": "2025-12-23T09:34:39.964165169Z", + "end_time": "2025-12-23T09:34:39.96418877Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2482, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/00481bb44df714ac3bbe91c3fcca21b4c8b70c42.json b/data/output/00481bb44df714ac3bbe91c3fcca21b4c8b70c42.json new file mode 100644 index 0000000..01f94fc --- /dev/null +++ b/data/output/00481bb44df714ac3bbe91c3fcca21b4c8b70c42.json @@ -0,0 +1,568 @@ +{ + "file_name": "00481bb44df714ac3bbe91c3fcca21b4c8b70c42.txt", + "total_words": 1038, + "top_n_words": [ + { + "word": "the", + "count": 49 + }, + { + "word": "of", + "count": 30 + }, + { + "word": "and", + "count": 28 + }, + { + "word": "in", + "count": 28 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "amazon", + "count": 15 + }, + { + "word": "its", + "count": 12 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "by", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "An .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "5-10.", + "length": 5 + }, + { + "text": "just £1.", + "length": 9 + }, + { + "text": "recession.", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "5billion).", + "length": 10 + }, + { + "text": "‘Amazon .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Blockbuster .", + "length": 13 + }, + { + "text": "1billion (£5.", + "length": 14 + }, + { + "text": "6billion (£9.", + "length": 14 + }, + { + "text": "administrators.", + "length": 15 + }, + { + "text": "3billion (£13.", + "length": 15 + }, + { + "text": "Waterstone’s .", + "length": 16 + }, + { + "text": "brokers Morningstar.", + "length": 20 + }, + { + "text": "December 2008: MFI, .", + "length": 21 + }, + { + "text": "December 2012: Comet .", + "length": 22 + }, + { + "text": "October 2012: JJB Sports .", + "length": 26 + }, + { + "text": "35billion in the UK – a .", + "length": 27 + }, + { + "text": "05:11 EST, 30 January 2013 .", + "length": 28 + }, + { + "text": "05:38 EST, 30 January 2013 .", + "length": 28 + }, + { + "text": "employees losing their jobs .", + "length": 29 + }, + { + "text": "Hugo Gye and Peter Campbell .", + "length": 29 + }, + { + "text": "Critics say this enables the .", + "length": 30 + }, + { + "text": "retailers in the years to come.", + "length": 31 + }, + { + "text": "down over the Christmas period.", + "length": 31 + }, + { + "text": "avoiding millions of pounds in taxes.", + "length": 37 + }, + { + "text": "The company has been under attack for .", + "length": 39 + }, + { + "text": "And industry experts warn more retail .", + "length": 39 + }, + { + "text": "8billion) in the fourth quarter of 2012.", + "length": 40 + }, + { + "text": "8million despite raking in sales of £3.", + "length": 40 + }, + { + "text": "traditional photographyJanuary 2013: HMV, .", + "length": 43 + }, + { + "text": "company to slash prices and undercut rivals.", + "length": 44 + }, + { + "text": "Online: Internet retailer Amazon has made £13.", + "length": 47 + }, + { + "text": "failures could see one in five shops boarded up.", + "length": 48 + }, + { + "text": "the number of home buyersJanuary 2013: Jessops .", + "length": 48 + }, + { + "text": "The world's biggest online retailer took in $21.", + "length": 48 + }, + { + "text": "The company came under fire for not honouring gift .", + "length": 52 + }, + { + "text": "vouchers which they had been selling all over Christmas.", + "length": 56 + }, + { + "text": "founder Tim Waterstone previously said Amazon had a ‘rude, .", + "length": 62 + }, + { + "text": "figure the firm tried to keep secret until it was exposed by MPs.", + "length": 65 + }, + { + "text": "contemptuous, arrogant and subversive’ attitude to competitors.", + "length": 65 + }, + { + "text": "For the current quarter, Amazon expects revenue of between $15-16.", + "length": 66 + }, + { + "text": "by Sports Direct - leading to the death of the JJB brand and 550 .", + "length": 66 + }, + { + "text": "struggling with online competition as customers turned away from .", + "length": 66 + }, + { + "text": "was closed by administrator PwC earlier this month after years of .", + "length": 67 + }, + { + "text": "is uniquely positioned to be a disruptive force to the traditional .", + "length": 68 + }, + { + "text": "‘Amazon has played a prominent role in the structural shift away .", + "length": 68 + }, + { + "text": "UK's announcement that it has gone into administration makes it the .", + "length": 69 + }, + { + "text": "from bricks-and-mortar retail and it may lay waste to several other .", + "length": 69 + }, + { + "text": "2011, the most recent year where figures are available, Amazon paid .", + "length": 69 + }, + { + "text": "and forcing the taxpayer to pick up a £50million tab related to its .", + "length": 70 + }, + { + "text": "business at the start of the downturn, as retail sales began to fall .", + "length": 70 + }, + { + "text": "retail channel for years to come,’ said analyst RJ Hottovy from US .", + "length": 70 + }, + { + "text": "closed all but 20 of its stores, which were taken over and re-branded .", + "length": 71 + }, + { + "text": "most visible legacy of the financial crisis and subsequent double-dip .", + "length": 71 + }, + { + "text": "latest in a long line of high-profile high street firms to fail - the .", + "length": 71 + }, + { + "text": "the furniture retailer, was one of the first major firms to go out of .", + "length": 71 + }, + { + "text": "which has 239 shops and 4,500 staff, announced that it was calling in .", + "length": 71 + }, + { + "text": "shuttered its 800 stores, bringing home to many the scale of the UK's .", + "length": 71 + }, + { + "text": "shut down just before Christmas, leaving nearly 7,000 staff out of work .", + "length": 73 + }, + { + "text": "estimated 17,500 high street jobs are at risk after 1,400 stores closed .", + "length": 73 + }, + { + "text": "bankruptcy, which was blamed on soaring energy prices and a reduction in .", + "length": 74 + }, + { + "text": "following a sharp rise in unemploymentWoolworthsJanuary 2009: Woolworths .", + "length": 74 + }, + { + "text": "The firm said that international sales, including the UK, soared 20 per cent to $9.", + "length": 83 + }, + { + "text": "Hit: Amazon's strong revenues are boosted by sales of its Kindle Fire tablet computer .", + "length": 87 + }, + { + "text": "Amazon was also dubbed ‘immoral’ for avoiding tax by funnelling revenue to Luxembourg.", + "length": 90 + }, + { + "text": "'We're now seeing the transition we've been expecting,' Amazon founder and CEO Jeff Bezos said.", + "length": 95 + }, + { + "text": "Endangered: More than 4,000 jobs are at risk after video rental service Blockbuster collapsed .", + "length": 95 + }, + { + "text": "Boom: Amazon's Christmas sales are up 22 per cent; pictured is its distribution centre in Fife .", + "length": 96 + }, + { + "text": "5billion in worldwide sales in just three months - with one tenth of that coming from UK shoppers .", + "length": 99 + }, + { + "text": "A further 4,190 jobs were endangered when video rental service Blockbuster collapsed earlier this month.", + "length": 104 + }, + { + "text": "Total sales were up by 22 per cent, driven above all by the popularity of its tablet computer, the Kindle Fire.", + "length": 111 + }, + { + "text": "And John Lewis boss Andy Street has called for a ‘level playing field’ to stop it ‘out-trading’ rivals.", + "length": 111 + }, + { + "text": "5billion) over the festive period as customers continued to desert bricks-and-mortar stores in favour of internet shopping.", + "length": 123 + }, + { + "text": "Sales of cheap CDs and DVDs helped hasten the decline of HMV, which is currently in administration with some 4,500 jobs at risk.", + "length": 128 + }, + { + "text": "Household names including HMV, Jessops, Blockbuster and Comet have all collapsed into administration during the past two months.", + "length": 128 + }, + { + "text": "‘Without the cost burden of physical retail stores, Amazon can price below traditional rivals and drive recurring traffic online.", + "length": 131 + }, + { + "text": "High Street stores may be in serious trouble, but their web rival Amazon is booming, as its Christmas sale figures revealed yesterday.", + "length": 134 + }, + { + "text": "'After five years, electronic books is a multibillion dollar category for us and growing fast - up approximately 70 per cent last year.", + "length": 135 + }, + { + "text": "’ Since launching, Amazon has expanded from selling books to include DVDs, CDs, household appliances, garden tools and even car parts.", + "length": 136 + }, + { + "text": "' The firm's revenues missed Wall Street's expectations, but investors still sent its stock up more than 10 per cent in after-hours trading.", + "length": 140 + }, + { + "text": "Amazon's massive Christmas revenues will increase pressure on the web giant to pay more tax in the UK, an issue which has seen it under fire recently.", + "length": 150 + }, + { + "text": "The firm boasted that it now offers a selection of 23 million TV shows, songs, magazines, books, audiobooks and apps - up from 19million at the end of 2011.", + "length": 156 + }, + { + "text": "However, net income fell by 45 per cent to $97million (£62million) due to greater investment in Amazon's distribution network and its Kindle range of devices.", + "length": 159 + }, + { + "text": "widespread customer angerBordersDecember 2009: Borders was another entertainment behemoth to go under as sale of CDs and DVDs were squeezed by digital downloads and online retailers .", + "length": 183 + }, + { + "text": "The group was the latest in a long line of entertainment and electronics stores which have collapsed since the financial crash in 2009, including Woolworths – once the UK’s biggest seller of DVDs.", + "length": 200 + }, + { + "text": "In trouble: HMV, which is currently in administration with some 4,500 jobs at risk, was the latest in a long line of entertainment and electronics stores which have collapsed since the financial crash in 2009 .", + "length": 210 + }, + { + "text": "January 2013: Blockbuster followed HMV into administration the next day, and immediately announced plans to close a quarter of its branches as it saw its DVD rental business collapse under pressure from online downloads .", + "length": 221 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7369696199893951 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:40.464488483Z", + "first_section_created": "2025-12-23T09:34:40.464887199Z", + "last_section_published": "2025-12-23T09:34:40.465211612Z", + "all_results_received": "2025-12-23T09:34:40.580061552Z", + "output_generated": "2025-12-23T09:34:40.580296462Z", + "total_processing_time_ms": 115, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 114, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:40.464887199Z", + "publish_time": "2025-12-23T09:34:40.465118508Z", + "first_worker_start": "2025-12-23T09:34:40.465917941Z", + "last_worker_end": "2025-12-23T09:34:40.579164Z", + "total_journey_time_ms": 114, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:40.465950842Z", + "start_time": "2025-12-23T09:34:40.466008744Z", + "end_time": "2025-12-23T09:34:40.466099348Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:40.466463Z", + "start_time": "2025-12-23T09:34:40.466629Z", + "end_time": "2025-12-23T09:34:40.579164Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 112 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:40.466009344Z", + "start_time": "2025-12-23T09:34:40.466075347Z", + "end_time": "2025-12-23T09:34:40.466194352Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:40.465859638Z", + "start_time": "2025-12-23T09:34:40.465917941Z", + "end_time": "2025-12-23T09:34:40.465957242Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:40.46515411Z", + "publish_time": "2025-12-23T09:34:40.465211612Z", + "first_worker_start": "2025-12-23T09:34:40.466060346Z", + "last_worker_end": "2025-12-23T09:34:40.540123Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:40.466029345Z", + "start_time": "2025-12-23T09:34:40.466060346Z", + "end_time": "2025-12-23T09:34:40.466093348Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:40.466303Z", + "start_time": "2025-12-23T09:34:40.466453Z", + "end_time": "2025-12-23T09:34:40.540123Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:40.466044446Z", + "start_time": "2025-12-23T09:34:40.466077547Z", + "end_time": "2025-12-23T09:34:40.466115949Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:40.465989644Z", + "start_time": "2025-12-23T09:34:40.466084747Z", + "end_time": "2025-12-23T09:34:40.466098248Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 185, + "min_processing_ms": 73, + "max_processing_ms": 112, + "avg_processing_ms": 92, + "median_processing_ms": 112, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3179, + "slowest_section_id": 0, + "slowest_section_time_ms": 114 + } +} diff --git a/data/output/00482026594f2d676865b52694eeae5d08537e8b.json b/data/output/00482026594f2d676865b52694eeae5d08537e8b.json new file mode 100644 index 0000000..1de94e3 --- /dev/null +++ b/data/output/00482026594f2d676865b52694eeae5d08537e8b.json @@ -0,0 +1,218 @@ +{ + "file_name": "00482026594f2d676865b52694eeae5d08537e8b.txt", + "total_words": 339, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "toby", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "carter", + "count": 10 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "as", + "count": 5 + }, + { + "word": "crouch", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "It's a heartwarming moment!", + "length": 27 + }, + { + "text": "The couple soon had a baby, Carter, and as the boy grew up he and Toby became inseparable.", + "length": 90 + }, + { + "text": "In the footage, the pair can be heard watching popular kids' TV show Daniel Tiger's Neighborhood.", + "length": 97 + }, + { + "text": "It seems Toby and Carter are as indiscriminate about their choice of friends as the show's characters.", + "length": 102 + }, + { + "text": "Since Mrs Crouch, of Indiana, uploaded the moving footage on YouTube it has been viewed almost 100,000 times.", + "length": 109 + }, + { + "text": "Adorable: The heartwarming video shows Toby the rescue dog protectively cuddling his sick best friend Carter .", + "length": 111 + }, + { + "text": "Underneath her post she writes: 'Mom walked in the room and saw Toby the rescue and Carter the toddler cuddling.", + "length": 112 + }, + { + "text": "' Toby was adopted by Mrs Crouch and her partner Jake - after his previous owner abandoned him outside a local bank.", + "length": 116 + }, + { + "text": "Loving: Carter and Toby are inseparable, according to the toddler's mother, Devin, who documents the pair's close bond in an online blog .", + "length": 138 + }, + { + "text": "As the aptly worded lyrics to a song in the cartoon ring out -  'friends help each other, yes they do' - it seems loving pooch Toby has been taking notes.", + "length": 155 + }, + { + "text": "This video captures the heartwarming moment Toby the rescue dog protectively cuddles his sick best friend Carter as the adorable pair lie on the sofa watching cartoons.", + "length": 168 + }, + { + "text": "Carter's mother documents the pair's close bond in a series of online blog posts, which see Toby and Carter asleep in bed together, playing in the garden and even in the bath.", + "length": 175 + }, + { + "text": "The show sees Daniel the tiger invite a new generation of preschoolers into the Neighborhood of Make-Believe every day - sharing adventures with O the Owl and Katerina Kittycat.", + "length": 177 + }, + { + "text": "When Carter's mother, Devin Crouch, turns the corner while filming the touching scene, Toby's paw is seen wrapped firmly around the poorly toddler in an obvious display of compassion.", + "length": 183 + }, + { + "text": "Protective: When Carter's mother, Devin Crouch, turns the corner while filming the touching scene, Toby's paw is seen wrapped firmly around the poorly toddler in an obvious display of compassion .", + "length": 197 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.3593265116214752 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:40.96613665Z", + "first_section_created": "2025-12-23T09:34:40.966409161Z", + "last_section_published": "2025-12-23T09:34:40.966565568Z", + "all_results_received": "2025-12-23T09:34:41.03222452Z", + "output_generated": "2025-12-23T09:34:41.032356326Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:40.966409161Z", + "publish_time": "2025-12-23T09:34:40.966565568Z", + "first_worker_start": "2025-12-23T09:34:40.967035787Z", + "last_worker_end": "2025-12-23T09:34:41.031254Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:40.967049687Z", + "start_time": "2025-12-23T09:34:40.967098389Z", + "end_time": "2025-12-23T09:34:40.967132391Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:40.967295Z", + "start_time": "2025-12-23T09:34:40.967406Z", + "end_time": "2025-12-23T09:34:41.031254Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:40.967071688Z", + "start_time": "2025-12-23T09:34:40.96712399Z", + "end_time": "2025-12-23T09:34:40.967184793Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:40.966981184Z", + "start_time": "2025-12-23T09:34:40.967035787Z", + "end_time": "2025-12-23T09:34:40.967050687Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1970, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/004835d31bb5f4d4d7a0e92dc4cd94fe9f2b5a89.json b/data/output/004835d31bb5f4d4d7a0e92dc4cd94fe9f2b5a89.json new file mode 100644 index 0000000..67d888d --- /dev/null +++ b/data/output/004835d31bb5f4d4d7a0e92dc4cd94fe9f2b5a89.json @@ -0,0 +1,668 @@ +{ + "file_name": "004835d31bb5f4d4d7a0e92dc4cd94fe9f2b5a89.txt", + "total_words": 1481, + "top_n_words": [ + { + "word": "the", + "count": 123 + }, + { + "word": "of", + "count": 51 + }, + { + "word": "to", + "count": 49 + }, + { + "word": "and", + "count": 45 + }, + { + "word": "a", + "count": 38 + }, + { + "word": "church", + "count": 30 + }, + { + "word": "as", + "count": 21 + }, + { + "word": "at", + "count": 21 + }, + { + "word": "with", + "count": 20 + }, + { + "word": "family", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "But .", + "length": 5 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Miss .", + "length": 6 + }, + { + "text": "Lanka.", + "length": 6 + }, + { + "text": "‘As .", + "length": 7 + }, + { + "text": "During .", + "length": 8 + }, + { + "text": "’ The .", + "length": 9 + }, + { + "text": "Despite .", + "length": 9 + }, + { + "text": "As they .", + "length": 9 + }, + { + "text": "‘After .", + "length": 10 + }, + { + "text": "broadcast.", + "length": 10 + }, + { + "text": "It was a .", + "length": 10 + }, + { + "text": "The answer?", + "length": 11 + }, + { + "text": "’ There .", + "length": 11 + }, + { + "text": "of Cornwall.", + "length": 12 + }, + { + "text": "recent years.", + "length": 13 + }, + { + "text": "She spoke at .", + "length": 14 + }, + { + "text": "Zara Phillips, .", + "length": 16 + }, + { + "text": "Last month he stood .", + "length": 21 + }, + { + "text": "afforded that honour.", + "length": 21 + }, + { + "text": "He spent the summer .", + "length": 21 + }, + { + "text": "SCROLL DOWN FOR VIDEO .", + "length": 23 + }, + { + "text": "in a long brown fur coat.", + "length": 25 + }, + { + "text": "to in her speech yesterday.", + "length": 27 + }, + { + "text": "While George was missing from .", + "length": 31 + }, + { + "text": "She said: ‘Here at home my own .", + "length": 34 + }, + { + "text": "Queen’s Christmas address at 3pm.", + "length": 35 + }, + { + "text": "Footage of the event was also shown.", + "length": 36 + }, + { + "text": "cream overcoat with a red tartan shawl.", + "length": 39 + }, + { + "text": "At home playing with the wrapping paper.", + "length": 40 + }, + { + "text": "family is a little larger this Christmas.", + "length": 41 + }, + { + "text": "future with ‘renewed happiness and hope’.", + "length": 45 + }, + { + "text": "Kate was also in a different outfit, choosing a .", + "length": 49 + }, + { + "text": "happy occasion, bringing together four generations.", + "length": 51 + }, + { + "text": "begun to take on more official duties on her behalf.", + "length": 52 + }, + { + "text": "half-mile walk from Sandringham House to the church.", + "length": 52 + }, + { + "text": "She is seen here greeting the Duchess of Cambridge .", + "length": 52 + }, + { + "text": "recuperating after exploratory surgery on his abdomen.", + "length": 54 + }, + { + "text": "the christening, we gathered for the traditional photograph.", + "length": 60 + }, + { + "text": "The Queen and the Duchess of Cambridge share a warm greeting .", + "length": 62 + }, + { + "text": "‘For the new parents, life will never be quite the same again.", + "length": 64 + }, + { + "text": "in for his mother for the first time at the bi-annual heads of .", + "length": 64 + }, + { + "text": "chance to contemplate the future with renewed happiness and hope.", + "length": 65 + }, + { + "text": "royal family posing for official photographs to mark the occasion.", + "length": 66 + }, + { + "text": "with her grandmother, the Queen, in her Bentley to the church door.", + "length": 67 + }, + { + "text": "Duchess of Cambridge can be heard coaxing her baby son to look at .", + "length": 67 + }, + { + "text": "trimmed version of the beard he grew during his expedition to the .", + "length": 67 + }, + { + "text": "so many of you will know, the arrival of a baby gives everyone the .", + "length": 68 + }, + { + "text": "day but was more interested in the wrapping paper than the presents.", + "length": 68 + }, + { + "text": "allowing unseen footage from his christening to be included in the .", + "length": 68 + }, + { + "text": "traditional Christmas Day church service on the Sandringham estate .", + "length": 68 + }, + { + "text": "arrived at church they were accompanied by Prince Harry, sporting a .", + "length": 69 + }, + { + "text": "was also a rare reference to her eldest son, Prince Charles, who has .", + "length": 70 + }, + { + "text": "length about how the birth of a baby helps people to think about the .", + "length": 70 + }, + { + "text": "Normally it is the Countess of Wessex, Prince Edward’s wife, who is .", + "length": 71 + }, + { + "text": "later spoke about George’s christening, as footage was shown of the .", + "length": 71 + }, + { + "text": "duke and duchess were among around 30 members of the royal family who .", + "length": 71 + }, + { + "text": "for a private family service earlier in the day she wrapped up warmly .", + "length": 71 + }, + { + "text": "celebrity portrait photographer Jason Bell, saying ‘Good boy, George.", + "length": 71 + }, + { + "text": "smiled as she accepted flowers and a piece of origami from the public .", + "length": 71 + }, + { + "text": "adding: ‘My son Charles summed this up at the recent meeting in Sri .", + "length": 71 + }, + { + "text": "his recent ill health, Prince Philip, 92, led his family on the brisk .", + "length": 71 + }, + { + "text": "who is just weeks away from giving birth to her first child, travelled .", + "length": 72 + }, + { + "text": "government meeting, which the Queen took the unusual step of referring .", + "length": 72 + }, + { + "text": "Princess Anne also wore a fur coat to the traditional Christmas service.", + "length": 72 + }, + { + "text": "made it clear how much joy her first great-grandson had given her, even .", + "length": 73 + }, + { + "text": "the speech, recorded at Buckingham Palace earlier this month, the Queen .", + "length": 73 + }, + { + "text": "Antarctic earlier this month, as well as Prince Charles and the Duchess .", + "length": 73 + }, + { + "text": "the morning’s events, he was the star of the show when it came to the .", + "length": 73 + }, + { + "text": "Howard, from Great Yarmouth, said: ‘She told me he was having a lovely .", + "length": 74 + }, + { + "text": "celebrated Christmas on the Queen’s Norfolk estate – far more than in .", + "length": 75 + }, + { + "text": "spoke about how the ‘Commonwealth can offer us a fresh view of life’, .", + "length": 75 + }, + { + "text": "Princess Anne and Vice Admiral Sir Timothy Laurence walk to church this morning .", + "length": 81 + }, + { + "text": "Father and son: The Duke of York and the Duke of Edinburgh arrive for the service .", + "length": 83 + }, + { + "text": "Vice Admiral Sir Timothy Laurence (left) and Autumn Philips (right) walk to church .", + "length": 84 + }, + { + "text": "Zara’s husband, Mike Tindall, walked alongside his brother-in-law, Peter Phillips.", + "length": 84 + }, + { + "text": "Popular: Around 3,000 well-wishers gathered outside the church to watch them arrive .", + "length": 85 + }, + { + "text": "’ The Duke and Duchess of Cambridge hold hands as they lead the Royal Family to the .", + "length": 87 + }, + { + "text": "Prince Philip and Prince Andrew arrive at church on the Sandringham Estate this morning .", + "length": 89 + }, + { + "text": "Family gathering: Lady Louise outside the church with her parents and cousin Prince William .", + "length": 93 + }, + { + "text": "Kate, who wore a tartan Alexander McQueen coat, made the comment to wellwisher Cicely Howard, 75.", + "length": 97 + }, + { + "text": "‘He spoke of the Commonwealth’s “family ties” that are a source of encouragement  to many.", + "length": 99 + }, + { + "text": "The Queen and Zara Tindall both wore animal-inspired hats to the traditional Christmas Day service .", + "length": 100 + }, + { + "text": "William and Harry and Prince Charles join other members of the Royal Family on their walk to church .", + "length": 101 + }, + { + "text": "Head of the family: Queen Elizabeth wore an orange coat with a black fur hat as she left the service .", + "length": 102 + }, + { + "text": "Appearance: There was a rare public outing for the Earl of Wessex's 10-year-old daughter Lady Louise .", + "length": 102 + }, + { + "text": "The look of love: Kate and William beamed at each other as they made their way to church hand in hand .", + "length": 103 + }, + { + "text": "Action man: Prince Harry was still sporting the beard he grew during his recent trip to the South Pole .", + "length": 104 + }, + { + "text": "Well-wishers: One cheeky member of the crowd shouted 'where's the baby' as Kate arrived for the service .", + "length": 105 + }, + { + "text": "The Duke and Duchess of Cambridge link arms as they follow other members of the Royal Family into church .", + "length": 106 + }, + { + "text": "The Royal Family look in festive spirits as they walk to church for the traditional Christmas Day service .", + "length": 107 + }, + { + "text": "The Queen, the Duchess of Cambridge and Princess Anne are seen arriving at church on the Sandringham estate .", + "length": 109 + }, + { + "text": "The Queen opted for a burnt orange coat with a black fur hat and matching gloves for the public service, at 11am.", + "length": 113 + }, + { + "text": "Princesses Beatrice (far left) and Eugenie (second right) walk to church with other members of the Royal Family .", + "length": 113 + }, + { + "text": "Princess Eugenie leads Princess Beatrice and other members of the Royal Family in the walk to church this morning .", + "length": 115 + }, + { + "text": "’ Brothers: A bearded Prince Harry grinned to the waiting crowds, while Prince William also chatted to onlookers .", + "length": 116 + }, + { + "text": "The Count and Countess of Wessex walk to church for the traditional Christmas service with the Duchess of Cambridge .", + "length": 117 + }, + { + "text": "Princess Anne and The Queen both wore fur to the service, and together with the Duchess of Cambridge seem to share a joke .", + "length": 123 + }, + { + "text": "Rugby player Mike Tindall (far left) appears to smile at the camera as he walks to church with members of the Royal Family .", + "length": 124 + }, + { + "text": "’ The Queen said: ‘As with all who are christened, George was baptised into a joyful faith of Christian duty and service.", + "length": 125 + }, + { + "text": "’ And William told the crowd: ‘We’ve had a good morning with George and I can’t wait until next year when he’s bigger.", + "length": 128 + }, + { + "text": "Gathering: heavily pregnant Zara Phillips looked relaxed and happy in a berry-coloured coat as she stood with her husband Mike Tindall .", + "length": 136 + }, + { + "text": "Meet and greet: Grinning Kate stopped to chat to waiting members of the public as she made her way to church, accepting flowers from well-wishers .", + "length": 147 + }, + { + "text": "The Queen and the Duchess of Cambridge share a warm greeting as they prepare to attend the traditional Christmas Day service on the Sandringham Estate .", + "length": 152 + }, + { + "text": "Seasons greetings: The Queen accessorised her orange coat with fur-trimmed gloves and a hat, while Zara Phillips looked festive in her red coat and hat .", + "length": 153 + }, + { + "text": "(Left) A heavily pregnant Zara Tindall arrives at church with her husband Mike and (right) Princes William and Harry share a chat as they walk to church .", + "length": 154 + }, + { + "text": "For the crowds who gathered at Sandringham to see the Royal Family attend church yesterday morning, there was one very important person missing – Prince George.", + "length": 162 + }, + { + "text": "Prince Philip, The Duchess of Cornwall, Prince Charles and the Duke and Duchess of Cambridge arrive at church this morning for a traditional Christmas Day service .", + "length": 164 + }, + { + "text": "Prince Harry, who has just returned from a short trip to Cape Town following his charity trek, shares a conversation with his brother William on the way to church .", + "length": 164 + }, + { + "text": "(Left) Princesses Beatrice (right) and Eugenie (left) arrive at church and (right) a bearded Prince Harry returned from a trip to Cape Town to join the festivities .", + "length": 165 + }, + { + "text": "(Left) The Duke of Edinburgh joined his family on the walk to church and (right) the Duchess of Cornwall wore a navy blue outfit  to attend the traditional service .", + "length": 166 + }, + { + "text": "Holding hands: New parents the Duke and Duchess of Cambridge smiled and held hands as they arrived for the Royal's Christmas church service, but left Prince George at home .", + "length": 173 + }, + { + "text": "Father-to-be Mike Tindall can be seen (far left) walking to church with other members of the Royal Family including the Count and Countess of Wessex and the Duchess of Cambridge .", + "length": 179 + }, + { + "text": "Some shouted ‘where’s the baby’ as his beaming parents, the Duke and Duchess of Cambridge, arrived hand in hand for the traditional Christmas Day service at St Mary Magdalene Church.", + "length": 188 + }, + { + "text": "The Duchess of Cambridge (pictured with the Duke of Cambridge) removed her coat to reveal a tartan Alexander McQueen dress: As the Royal Family arrived at church they were greeted by scores of well-wishers .", + "length": 207 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4702935665845871 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:41.467320599Z", + "first_section_created": "2025-12-23T09:34:41.467683514Z", + "last_section_published": "2025-12-23T09:34:41.46809803Z", + "all_results_received": "2025-12-23T09:34:41.571443606Z", + "output_generated": "2025-12-23T09:34:41.571664915Z", + "total_processing_time_ms": 104, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 103, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:41.467683514Z", + "publish_time": "2025-12-23T09:34:41.467952024Z", + "first_worker_start": "2025-12-23T09:34:41.468469545Z", + "last_worker_end": "2025-12-23T09:34:41.551302Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:41.468521147Z", + "start_time": "2025-12-23T09:34:41.46858025Z", + "end_time": "2025-12-23T09:34:41.468673054Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:41.468786Z", + "start_time": "2025-12-23T09:34:41.468958Z", + "end_time": "2025-12-23T09:34:41.551302Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:41.468481146Z", + "start_time": "2025-12-23T09:34:41.468562949Z", + "end_time": "2025-12-23T09:34:41.468658353Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:41.468405443Z", + "start_time": "2025-12-23T09:34:41.468469545Z", + "end_time": "2025-12-23T09:34:41.468512747Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:41.468006327Z", + "publish_time": "2025-12-23T09:34:41.46809803Z", + "first_worker_start": "2025-12-23T09:34:41.46859205Z", + "last_worker_end": "2025-12-23T09:34:41.570607Z", + "total_journey_time_ms": 102, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:41.468555249Z", + "start_time": "2025-12-23T09:34:41.46859525Z", + "end_time": "2025-12-23T09:34:41.468665853Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:41.468776Z", + "start_time": "2025-12-23T09:34:41.468959Z", + "end_time": "2025-12-23T09:34:41.570607Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 101 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:41.468600651Z", + "start_time": "2025-12-23T09:34:41.468660653Z", + "end_time": "2025-12-23T09:34:41.468755357Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:41.468541648Z", + "start_time": "2025-12-23T09:34:41.46859205Z", + "end_time": "2025-12-23T09:34:41.468622452Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 183, + "min_processing_ms": 82, + "max_processing_ms": 101, + "avg_processing_ms": 91, + "median_processing_ms": 101, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4364, + "slowest_section_id": 1, + "slowest_section_time_ms": 102 + } +} diff --git a/data/output/0048594a5077b8de55cbd682cb2b8214192e892d.json b/data/output/0048594a5077b8de55cbd682cb2b8214192e892d.json new file mode 100644 index 0000000..8ce76d9 --- /dev/null +++ b/data/output/0048594a5077b8de55cbd682cb2b8214192e892d.json @@ -0,0 +1,314 @@ +{ + "file_name": "0048594a5077b8de55cbd682cb2b8214192e892d.txt", + "total_words": 708, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "i", + "count": 18 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "he", + "count": 14 + }, + { + "word": "his", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "schweitzer", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "I'm accepting.", + "length": 14 + }, + { + "text": "Dianne Feinstein.", + "length": 17 + }, + { + "text": "How do I say this ...", + "length": 21 + }, + { + "text": "Again, I couldn't care less.", + "length": 28 + }, + { + "text": "They just have effeminate mannerisms.", + "length": 37 + }, + { + "text": "'But he's not, I think, so I don't know.", + "length": 40 + }, + { + "text": "' She then stepped onto the Senate floor.", + "length": 41 + }, + { + "text": "men in the South, they are a little effeminate.", + "length": 47 + }, + { + "text": "His comments were met with widespread disapproval.", + "length": 50 + }, + { + "text": "Intelligence Committee chairwoman to a prostitute.", + "length": 50 + }, + { + "text": "If a Democrat is good with money, you can’t beat ‘em.", + "length": 57 + }, + { + "text": "'And now she says, 'I'm a nun,' when it comes to this spying.", + "length": 61 + }, + { + "text": "I mean, maybe that's the wrong metaphor — but she was all in!", + "length": 63 + }, + { + "text": "' He added: 'Don't hold this against me, but I'm going to blurt it out.", + "length": 71 + }, + { + "text": "'I am deeply sorry and sincerely apologize for my carelessness and disregard.", + "length": 77 + }, + { + "text": "'I am deeply sorry': Schweitzer made the humble apology on his Facebook page .", + "length": 78 + }, + { + "text": "Diane Feinstein has more class in her pinky than Schweitzer has in his 20 gallon hat.", + "length": 85 + }, + { + "text": "Schweitzer made the slur after a similarly ill-advised gaffe in which he compared Sen.", + "length": 86 + }, + { + "text": "' Republican Mike Coffman added: 'What kind of sexist scumbag says something like this?", + "length": 87 + }, + { + "text": "'I cut more taxes than any governor in the history of Montana, invested more new money in education.", + "length": 100 + }, + { + "text": "'She was the woman who was standing under the streetlight with her dress pulled all the way up over her knees,' he said.", + "length": 120 + }, + { + "text": "” But he went on to tell the show's hosts that he would be a successful president, citing his record as governor of Montana.", + "length": 126 + }, + { + "text": "' And yesterday when asked about Schweitzer's comments, Feinstein laughed and said: 'You better keep him away from my husband.", + "length": 126 + }, + { + "text": "' Media fury: His comments were met with widespread disapproval with Republican National Committee Reince Priebus tweeting this .", + "length": 129 + }, + { + "text": "'I recently made a number of stupid and insensitive remarks to a reporter from the National Journal,' he wrote on his Facebook page.", + "length": 132 + }, + { + "text": "' Schweitzer has hinted at a desire to compete in the Democrat presidential primaries in 2016, even if Hillary Clinton runs as well.", + "length": 132 + }, + { + "text": "She then made a hand gesture indicating that Schweitzer’s remarks were not mentally balanced, adding: 'That’s all I’m going to say.", + "length": 137 + }, + { + "text": "But yesterday the Democrat issued a humble apology for his 'stupid and insensitive remarks' after they were published in the National Journal.", + "length": 142 + }, + { + "text": "' Cantor, who represents central Virginia and is outgoing House Majority Leader, has been married to his wife for 25 years and has three children.", + "length": 146 + }, + { + "text": "'As governor, I spent eight years — every single year I was governor we had the largest budget surplus in the history of Montana,' Schweitzer said.", + "length": 149 + }, + { + "text": "Feinstein, the Democratic senator from California who chairs the Intelligence Committee, to a prostitute whle discussing her position on intelligence gathering.", + "length": 160 + }, + { + "text": "Republican National Committee Reince Priebus tweeted: 'If a Republican said *anything* about \"gaydar\" or a senator being a \"prostitute,\" the media fury would never end.", + "length": 168 + }, + { + "text": "Brian Schweitzer sparked outrage over comments he made this week in a newspaper interview in which he speculated over Cantor's sexuality and railed against fellow Democrat Sen.", + "length": 176 + }, + { + "text": "Asked on MSNBC’s “Morning Joe” program in January about whether he would run for president in 2016, he said, “I haven’t decided that,” before adding, “It would ruin my life.", + "length": 187 + }, + { + "text": "' Schweitzer shocked colleagues and adversaries alike when he shared his perceptions of Republican Mr Cantor's sexuality following his surprising Virginia GOP loss last week to an unknown candidate.", + "length": 198 + }, + { + "text": "'If you were just a regular person, you turned on the TV, and you saw Eric Cantor talking, I would say — and I'm fine with gay people, that's all right — but my gaydar is 60-70 percent,' he said.", + "length": 199 + }, + { + "text": "Ill-advised: In the interview, Schweitzer said House Majority Leader Eric Cantor (left) sets off his 'gaydar' after likening Intelligence Committee chairperson Dianne Feinstein (right) to a prostitute .", + "length": 202 + }, + { + "text": "Stupid and insensitive: Democrat Brian Schweitzer issued a humble apology for his 'stupid and insensitive remarks' The former governor of Montana has been forced into a grovelling apology after saying House Majority Leader Eric Cantor sets off his 'gaydar' and likening the U.", + "length": 276 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.920434296131134 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:41.968880463Z", + "first_section_created": "2025-12-23T09:34:41.969244277Z", + "last_section_published": "2025-12-23T09:34:41.969512188Z", + "all_results_received": "2025-12-23T09:34:42.031635598Z", + "output_generated": "2025-12-23T09:34:42.031780104Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:41.969244277Z", + "publish_time": "2025-12-23T09:34:41.969512188Z", + "first_worker_start": "2025-12-23T09:34:41.970017809Z", + "last_worker_end": "2025-12-23T09:34:42.029012Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:41.970020509Z", + "start_time": "2025-12-23T09:34:41.970093612Z", + "end_time": "2025-12-23T09:34:41.970177915Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:41.970231Z", + "start_time": "2025-12-23T09:34:41.970386Z", + "end_time": "2025-12-23T09:34:42.029012Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:41.969995108Z", + "start_time": "2025-12-23T09:34:41.970066311Z", + "end_time": "2025-12-23T09:34:41.970168315Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:41.969943806Z", + "start_time": "2025-12-23T09:34:41.970017809Z", + "end_time": "2025-12-23T09:34:41.97004791Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4209, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/004971d00a8b29b67597b2baad67e813dbbf4b66.json b/data/output/004971d00a8b29b67597b2baad67e813dbbf4b66.json new file mode 100644 index 0000000..f5a074b --- /dev/null +++ b/data/output/004971d00a8b29b67597b2baad67e813dbbf4b66.json @@ -0,0 +1,464 @@ +{ + "file_name": "004971d00a8b29b67597b2baad67e813dbbf4b66.txt", + "total_words": 1205, + "top_n_words": [ + { + "word": "the", + "count": 56 + }, + { + "word": "a", + "count": 40 + }, + { + "word": "to", + "count": 32 + }, + { + "word": "and", + "count": 30 + }, + { + "word": "in", + "count": 29 + }, + { + "word": "s", + "count": 27 + }, + { + "word": "brown", + "count": 21 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "for", + "count": 16 + }, + { + "word": "he", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "Nope.", + "length": 5 + }, + { + "text": "It's fun.", + "length": 9 + }, + { + "text": "But so what?", + "length": 12 + }, + { + "text": "See: Romney, Mitt.", + "length": 18 + }, + { + "text": "See: Cain, Herman.", + "length": 18 + }, + { + "text": "What's not to love?", + "length": 19 + }, + { + "text": "Well, not completely.", + "length": 21 + }, + { + "text": "You should call him and ask.", + "length": 28 + }, + { + "text": "But what if Brown takes off?", + "length": 28 + }, + { + "text": "Steve King and New York Rep.", + "length": 28 + }, + { + "text": "Rand Paul and New Jersey Gov.", + "length": 29 + }, + { + "text": "It's the same reason Iowa Rep.", + "length": 30 + }, + { + "text": "The real question is: Why not?", + "length": 30 + }, + { + "text": "\"Funnel cake and free name ID.", + "length": 30 + }, + { + "text": "I could definitely anticipate this.", + "length": 35 + }, + { + "text": "Brown knows exactly what he's doing.", + "length": 36 + }, + { + "text": "On the Republican side, Kentucky Sen.", + "length": 37 + }, + { + "text": "For the moment, it would seem the latter.", + "length": 41 + }, + { + "text": "Which category does Scott Brown fall under?", + "length": 43 + }, + { + "text": "2016 Watch: Scott Brown makes a stop in Iowa .", + "length": 46 + }, + { + "text": "Another Cruz trip to Iowa stokes 2016 speculation .", + "length": 51 + }, + { + "text": "that would make it difficult for Brown to raise money.", + "length": 54 + }, + { + "text": "Scott Brown stirs speculation with New Hampshire visit .", + "length": 56 + }, + { + "text": "Will either of them be taking the oath of office one day?", + "length": 57 + }, + { + "text": "Even some of his former advisers aren't sure what he's up to.", + "length": 61 + }, + { + "text": "He was enthusiastic about getting back in the national conversation.", + "length": 68 + }, + { + "text": "\"The fact that he is in Iowa doesn't surprise me,\" the Republican told CNN.", + "length": 75 + }, + { + "text": "\"How many stories got posted about Ed Markey's legislative agenda yesterday?", + "length": 76 + }, + { + "text": "Chris Christie are driving national discussions about ideology and governance.", + "length": 78 + }, + { + "text": "Peter King (no relation; not even close) are \"refusing to rule out\" a 2016 bid.", + "length": 79 + }, + { + "text": "\" asked Will Ritter, a Boston-based GOP operative and former Mitt Romney adviser.", + "length": 81 + }, + { + "text": "\"In '16, it's not a bad idea to put your name out there and see where it takes you.", + "length": 83 + }, + { + "text": "What if he rattles off a few good lines, has his moment in the sun, and then fades?", + "length": 83 + }, + { + "text": "\" \"He's acting on his own, as far as I can tell,\" another onetime adviser said in an e-mail.", + "length": 92 + }, + { + "text": "For someone with no real perch other than a paid gig at Fox News, it actually makes a lot of sense.", + "length": 99 + }, + { + "text": "\"Running\" for president means doing a lot of interviews and delivering some well-timed lines in debates.", + "length": 104 + }, + { + "text": "Neither Republican did a single thing to advance the news cycle other than board an airplane to Des Moines.", + "length": 107 + }, + { + "text": "\" For obsessive political watchers, Brown's shamelessness about the whole enterprise is kind of refreshing.", + "length": 107 + }, + { + "text": "But even if they don't, the two conservatives proved how keeping one's name in the 2016 conversation is its own reward.", + "length": 119 + }, + { + "text": "Another Republican who has spoken with Brown recently isn't surprised by the sudden interest in the presidential spotlight.", + "length": 123 + }, + { + "text": "Asked by text message if he's serious about a presidential bid, one Republican who talks to Brown often responded: \"Who knows.", + "length": 126 + }, + { + "text": "Brown, this person said, is a relentlessly enthusiastic guy who still takes great pride in capturing Ted Kennedy's old Senate seat.", + "length": 131 + }, + { + "text": "Senator Brown's a skilled retail politician and this gives him a platform to talk about a brand of Republicanism we could use more of.", + "length": 134 + }, + { + "text": "Christie raised money for Brown several times during his Senate tenure, and, according to one Christie insider, the two are \"very friendly.", + "length": 139 + }, + { + "text": "After all, Brown has already floated bids for New Hampshire senator and Massachusetts governor, and he doesn't seem likely to pursue either.", + "length": 140 + }, + { + "text": "Both men seem likely to run for the Republican nomination, and both will be returning to Iowa over and over and over again in the coming years.", + "length": 143 + }, + { + "text": "Still, there's a difference between running for president and \"running\" for president, even though it's sometimes difficult to tell the difference.", + "length": 147 + }, + { + "text": "He will have lost absolutely nothing -- but gained a spot on some vice presidential short lists along with a hike in his post-campaign speaking fees.", + "length": 149 + }, + { + "text": "\"The focus on the 2016 presidential contest is completely ridiculous, and everybody knows it,\" wrote David Weigel of Slate after witnessing the cattle call.", + "length": 156 + }, + { + "text": "What if he gains a toehold in New Hampshire, rises to high single digits in the polls sometime in 2015, and gets invited to some Republican primary debates?", + "length": 156 + }, + { + "text": "The truth is that in today's media environment, there's almost no downside for a long-shot \"candidate\" like Brown to tell people he's mulling a White House run.", + "length": 160 + }, + { + "text": "In their speeches, Cruz and Santorum issued a series of anti-Obama bromides and boosted their profiles with the grassroots activists who attended the Iowa summit.", + "length": 162 + }, + { + "text": "Why in the world would Scott Brown, a former half-term Senator from Mitt Romney's Massachusetts, put himself in the mix for the 2016 Republican presidential nomination?", + "length": 168 + }, + { + "text": "\" Then there's the fact that Brown, who supports some abortion rights, isn't exactly a hardliner on social issues that matter to so many Republican caucus-goers in Iowa.", + "length": 169 + }, + { + "text": "Before his Iowa trip, Brown met privately with Christie at the Republican National Committee's summer conference in Boston, a meeting first reported by the New York Times.", + "length": 171 + }, + { + "text": "\"I do admire the audacity to just go to the state fair and tweet about it,\" said Jeff Smith, a professor at the New School and regular contributor to the Washington Twitter conversation.", + "length": 186 + }, + { + "text": "Eric Fehrnstrom, the media strategist who crafted Brown's truck-driving, regular-guy image during his stunning 2010 Senate upset, is not currently advising him, Republican sources told CNN.", + "length": 189 + }, + { + "text": "Running for president requires hard work, an ungodly amount of fundraising effort, a professional team of advisers, polling, a paid media strategy, a voter contact operation and ballot access.", + "length": 192 + }, + { + "text": "Just by going to the Iowa State Fair, a must-do for any ambitious pol, Brown will be rewarded with the only currency that matters in modern campaign politics (other than hard fundraising dollars): Buzz.", + "length": 202 + }, + { + "text": "In our atomized media ecosystem, there's certainly a market for niche political coverage, in the same way there's a market for micro-reporting on the status of Robert Griffin III's return from knee surgery.", + "length": 206 + }, + { + "text": "Brown was thumped by Elizabeth Warren in his 2012 re-election bid, and he became something of a punch line earlier this year after he unleashed a volley of questionable late night tweets at some online critics.", + "length": 210 + }, + { + "text": "Yet there they were, trailed at every turn by reporters from the Washington Post, Des Moines Register, Associated Press, New York Times, Dallas Morning News, Wall Street Journal, NBC News, ABC News and Fox News.", + "length": 211 + }, + { + "text": "Just look at this month's Family Leadership Summit, a gathering of social conservatives in Iowa that drew potential 2016 presidential contenders Ted Cruz and Rick Santorum to the first-in-the-nation caucus state.", + "length": 212 + }, + { + "text": "More importantly, the presidential cycle is starting earlier than it ever has, with advisers to likely candidates working behind the scenes to assemble campaign infrastructure and peddle dirt on their potential opponents.", + "length": 221 + }, + { + "text": "When Brown told the Des Moines Register over the weekend that he was heading to the Iowa State Fair \"to determine whether there's an interest in my brand of leadership and Republicanism,\" the news was met with some amusement by political insiders.", + "length": 247 + }, + { + "text": "But with so many news platforms to fill -- on television, on the web, on the radio -- a presidential trial balloon or a trip to Iowa is almost guaranteed to get you at least a crumb of media exposure, a boost in stature, and maybe even a few campaign contributions down the road.", + "length": 279 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5201773941516876 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:42.470298621Z", + "first_section_created": "2025-12-23T09:34:42.471951788Z", + "last_section_published": "2025-12-23T09:34:42.472380705Z", + "all_results_received": "2025-12-23T09:34:42.574355025Z", + "output_generated": "2025-12-23T09:34:42.574586234Z", + "total_processing_time_ms": 104, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 101, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:42.471951788Z", + "publish_time": "2025-12-23T09:34:42.4722498Z", + "first_worker_start": "2025-12-23T09:34:42.47274422Z", + "last_worker_end": "2025-12-23T09:34:42.54395Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:42.472730819Z", + "start_time": "2025-12-23T09:34:42.472806722Z", + "end_time": "2025-12-23T09:34:42.472911226Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:42.472969Z", + "start_time": "2025-12-23T09:34:42.473117Z", + "end_time": "2025-12-23T09:34:42.54395Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:42.472682417Z", + "start_time": "2025-12-23T09:34:42.472772721Z", + "end_time": "2025-12-23T09:34:42.472904426Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:42.472664716Z", + "start_time": "2025-12-23T09:34:42.47274422Z", + "end_time": "2025-12-23T09:34:42.472781621Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:42.472303002Z", + "publish_time": "2025-12-23T09:34:42.472380705Z", + "first_worker_start": "2025-12-23T09:34:42.472826623Z", + "last_worker_end": "2025-12-23T09:34:42.573233Z", + "total_journey_time_ms": 100, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:42.472846224Z", + "start_time": "2025-12-23T09:34:42.472887925Z", + "end_time": "2025-12-23T09:34:42.472926427Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:42.473074Z", + "start_time": "2025-12-23T09:34:42.473185Z", + "end_time": "2025-12-23T09:34:42.573233Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 100 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:42.472800222Z", + "start_time": "2025-12-23T09:34:42.472836323Z", + "end_time": "2025-12-23T09:34:42.472881625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:42.472794122Z", + "start_time": "2025-12-23T09:34:42.472826623Z", + "end_time": "2025-12-23T09:34:42.472847724Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 170, + "min_processing_ms": 70, + "max_processing_ms": 100, + "avg_processing_ms": 85, + "median_processing_ms": 100, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3488, + "slowest_section_id": 1, + "slowest_section_time_ms": 100 + } +} diff --git a/data/output/004a3692a2fe1fb76513a3459b8251c44e0ce4a0.json b/data/output/004a3692a2fe1fb76513a3459b8251c44e0ce4a0.json new file mode 100644 index 0000000..7ee91b3 --- /dev/null +++ b/data/output/004a3692a2fe1fb76513a3459b8251c44e0ce4a0.json @@ -0,0 +1,246 @@ +{ + "file_name": "004a3692a2fe1fb76513a3459b8251c44e0ce4a0.txt", + "total_words": 499, + "top_n_words": [ + { + "word": "in", + "count": 19 + }, + { + "word": "s", + "count": 18 + }, + { + "word": "the", + "count": 14 + }, + { + "word": "h", + "count": 13 + }, + { + "word": "m", + "count": 13 + }, + { + "word": "sydney", + "count": 13 + }, + { + "word": "for", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "store", + "count": 10 + }, + { + "word": "and", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Marielle Simon for Daily Mail Australia .", + "length": 41 + }, + { + "text": "It has to change; as retailers we have to do better.", + "length": 52 + }, + { + "text": "Australian supermodel, Miranda Kerr launch H\u0026M's 2014 spring collection .", + "length": 73 + }, + { + "text": "The H\u0026M store in Melbourne's historic GPO building apparently saw more than 1.", + "length": 78 + }, + { + "text": "H\u0026M manager for Australian Hans Andersson said men's fashion has been a hit in Melbourne's store .", + "length": 98 + }, + { + "text": "7 million people in less than 20 weeks, and now sees customers queuing outside the store made for 1200-persons.", + "length": 112 + }, + { + "text": "Pitt Street's Glasshouse centre in Sydney's CBD will be making room for a 5000 square metre, three-level H\u0026M boutique .", + "length": 119 + }, + { + "text": "Sydney's CBD store will follow the same concept of LA's marketing and stock due to warmer weather and influence of water .", + "length": 123 + }, + { + "text": "Now, Pitt Street's Glasshouse centre in Sydney's CBD will be making room for a 5000 square metre, three-level H\u0026M boutique.", + "length": 123 + }, + { + "text": "'We deliver the latest fashion but from what I understand the Australian market has been receiving trends one season too late.", + "length": 126 + }, + { + "text": "Mr Andersson said Sydney is now the teams focus, which will be drawing on LA marketing and stock due to warmer weather and influence of water.", + "length": 142 + }, + { + "text": "In April this year, H\u0026M opened its Australian flagship store in Melbourne, which recorded as one of H\u0026M's most successful opening days for a new store .", + "length": 152 + }, + { + "text": "Sydney's H\u0026M boutique will offer their full collection which includes homewares, sportswear, an accessory range and all clothing for women, men and children.", + "length": 158 + }, + { + "text": "Great news for Sydney's fashionistas - H\u0026M has announced they have found the perfect Sydney location for their second store in Australia, which is set to open mid-2015.", + "length": 169 + }, + { + "text": "The multi-billion dollar fashion chain said both Australian stores will receive new stock everyday, with plans to extend trading hours in attempt to cash in on passer-bys.", + "length": 171 + }, + { + "text": "'We will have Alexander Wang in Macquarie and Melbourne, so both stores will have them in store in November, which is something that we don't normally do in suburban stores,' Mr Andersson said.", + "length": 193 + }, + { + "text": "The western suburbs of Sydney is also set to welcome H\u0026M's much anticipated collaborative collection with American fashion-designer Alexander Wang, to North Ryde's AMP Macquarie Centre on October 16.", + "length": 199 + }, + { + "text": "Especially in a city such as Sydney, which has world-class restaurants, yet the shopping is not up to the same standard,' H\u0026M manager for Australian Hans Andersson recently told Sydney Morning Herald.", + "length": 200 + }, + { + "text": "But Sydney shoppers are not the only ones that should be excited, with rumours revealing H\u0026M's thoughts to collaborate with a local Australian designer, who is yet to be established, before Sydney's launch.", + "length": 206 + }, + { + "text": "The Swedish retail-clothing company exists in 53 countries across the world, and in April this year opened its Australian flagship store in Melbourne, which recorded one of H\u0026M's most successful opening days for a new store, Sydney Morning Herald reported in April.", + "length": 267 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.6200096607208252 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:42.973141944Z", + "first_section_created": "2025-12-23T09:34:42.973461557Z", + "last_section_published": "2025-12-23T09:34:42.973653965Z", + "all_results_received": "2025-12-23T09:34:43.033978804Z", + "output_generated": "2025-12-23T09:34:43.03413201Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:42.973461557Z", + "publish_time": "2025-12-23T09:34:42.973653965Z", + "first_worker_start": "2025-12-23T09:34:42.974331192Z", + "last_worker_end": "2025-12-23T09:34:43.031734Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:42.974291091Z", + "start_time": "2025-12-23T09:34:42.974358293Z", + "end_time": "2025-12-23T09:34:42.974411295Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:42.974545Z", + "start_time": "2025-12-23T09:34:42.97468Z", + "end_time": "2025-12-23T09:34:43.031734Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:42.97427619Z", + "start_time": "2025-12-23T09:34:42.974331192Z", + "end_time": "2025-12-23T09:34:42.974394495Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:42.974294991Z", + "start_time": "2025-12-23T09:34:42.974360593Z", + "end_time": "2025-12-23T09:34:42.974386094Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2849, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/004a525f45c51c298817088fd2d1562fe51cb03a.json b/data/output/004a525f45c51c298817088fd2d1562fe51cb03a.json new file mode 100644 index 0000000..a3877b5 --- /dev/null +++ b/data/output/004a525f45c51c298817088fd2d1562fe51cb03a.json @@ -0,0 +1,322 @@ +{ + "file_name": "004a525f45c51c298817088fd2d1562fe51cb03a.txt", + "total_words": 525, + "top_n_words": [ + { + "word": "the", + "count": 24 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "said", + "count": 14 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "troops", + "count": 11 + }, + { + "word": "nato", + "count": 8 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "are", + "count": 7 + }, + { + "word": "a", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "It .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "operations this spring .", + "length": 24 + }, + { + "text": "12:46 EST, 4 September 2013 .", + "length": 29 + }, + { + "text": "01:27 EST, 3 September 2013 .", + "length": 29 + }, + { + "text": "Amanda Williams and Gerri Peev .", + "length": 32 + }, + { + "text": "commitment to the country, he said.", + "length": 35 + }, + { + "text": "Dunford insisted that troops had been .", + "length": 39 + }, + { + "text": "US General Joseph Dunford said 'time's .", + "length": 40 + }, + { + "text": "US General Joseph Dunford said 'time's .", + "length": 40 + }, + { + "text": "their goals' to crush the government's will.", + "length": 44 + }, + { + "text": "Some 1,792 Afghan police officers have been .", + "length": 45 + }, + { + "text": "troops will stay on in Afghanistan after 2014.", + "length": 46 + }, + { + "text": "It will reassure both Afghans and their neighbours of U.", + "length": 56 + }, + { + "text": "‘I’m not assuming that those casualties are sustainable.", + "length": 60 + }, + { + "text": "American forces account for around two-thirds of the number.", + "length": 60 + }, + { + "text": "going to tell' whether it was premature for Nato to end combat .", + "length": 64 + }, + { + "text": "going to tell' whether it was premature for Nato to end combat .", + "length": 64 + }, + { + "text": "killed since March – twice as many as the same period last year, .", + "length": 68 + }, + { + "text": "month said the United States and its allies should confirm  how many .", + "length": 71 + }, + { + "text": "comes after former NATO supreme commander Admiral James Stavridis last .", + "length": 72 + }, + { + "text": "successful in 'defying the Taliban' and 'preventing them from achieving .", + "length": 73 + }, + { + "text": "He said a Bilateral Security Agreement (BSA) now needs to be signed within months.", + "length": 82 + }, + { + "text": "In August President Obama promised this would signal 'the end of our war in Afghanistan.", + "length": 88 + }, + { + "text": "' Currently there are approximately 100,000 troops in Afghanistan in the NATO-led coalition.", + "length": 92 + }, + { + "text": "But after a 'difficult' negotiation period with Kabul, officials have since spoken of removing all troops.", + "length": 106 + }, + { + "text": "In reference to the casualty rates, he said: ‘I view it as serious, and so do all the commanders,’ he said.", + "length": 111 + }, + { + "text": "He said it should be clarified in order to combat Taliban propaganda that foreign troops are 'abandoning the country'.", + "length": 118 + }, + { + "text": "operations this spring and said the follow-on mission focusing on 'capability development' is likely to last several years.", + "length": 123 + }, + { + "text": "By the end of 2014, Afghans are expected to take full responsibility for their security, and all combat Nato troops will leave.", + "length": 127 + }, + { + "text": "The White House has in the past suggested a small force of between 8,000 to 12,000 troops will remain in the country after 2014 in a support role.", + "length": 146 + }, + { + "text": "Stavridis said in a commentary in Foreign Policy: 'Instead of waiting for months, we should move now to decide and publicly reveal the commitment.", + "length": 146 + }, + { + "text": "' He said 'close air support' may still be needed to assist Afghan security forces, which are only just beginning to develop their own aviation capacity.", + "length": 153 + }, + { + "text": "Western troops are likely to remain in Afghanistan beyond 2015 in order to help develop the Afghan air force and intelligence capability, a Nato commander has said.", + "length": 164 + }, + { + "text": "according to the Afghan interior ministry, leading to claims that the country is in no fit state to be left without Western combat troops as they prepare to exit this spring.", + "length": 174 + }, + { + "text": "’ Gen Dunford said the 350,000 local security forces struggled with both intelligence-gathering and logistics and needed ‘three to five years’ before they no longer needed help from foreign troops.", + "length": 203 + }, + { + "text": "By the end of 2014, Afghans are expected to take full responsibility for their security, and all combat Nato troops will leave (Stock picture) General Joseph Dunford, the top US and Nato commander in the country, said that casualty rates were 'serious' in an interview .", + "length": 270 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.46680301427841187 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:43.474415212Z", + "first_section_created": "2025-12-23T09:34:43.474781527Z", + "last_section_published": "2025-12-23T09:34:43.474988136Z", + "all_results_received": "2025-12-23T09:34:43.540908301Z", + "output_generated": "2025-12-23T09:34:43.541068407Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:43.474781527Z", + "publish_time": "2025-12-23T09:34:43.474988136Z", + "first_worker_start": "2025-12-23T09:34:43.475469855Z", + "last_worker_end": "2025-12-23T09:34:43.540037Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:43.475412153Z", + "start_time": "2025-12-23T09:34:43.475469855Z", + "end_time": "2025-12-23T09:34:43.475530957Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:43.475694Z", + "start_time": "2025-12-23T09:34:43.475849Z", + "end_time": "2025-12-23T09:34:43.540037Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:43.475418953Z", + "start_time": "2025-12-23T09:34:43.475497356Z", + "end_time": "2025-12-23T09:34:43.475571459Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:43.475496156Z", + "start_time": "2025-12-23T09:34:43.475574559Z", + "end_time": "2025-12-23T09:34:43.475609761Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3167, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/004ab01063247aa6c4c28dedf82af6e228c558b3.json b/data/output/004ab01063247aa6c4c28dedf82af6e228c558b3.json new file mode 100644 index 0000000..ee0729a --- /dev/null +++ b/data/output/004ab01063247aa6c4c28dedf82af6e228c558b3.json @@ -0,0 +1,230 @@ +{ + "file_name": "004ab01063247aa6c4c28dedf82af6e228c558b3.txt", + "total_words": 336, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "girl", + "count": 7 + }, + { + "word": "saadiya", + "count": 7 + }, + { + "word": "stones", + "count": 7 + }, + { + "word": "be", + "count": 6 + }, + { + "word": "by", + "count": 6 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "that", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'Others fear it could be the start of a dangerous epidemic.", + "length": 59 + }, + { + "text": "Above, a collection of small, hard stones that Saadiya has 'cried' out .", + "length": 72 + }, + { + "text": "Right, a small box full of stones  that have fallen out of the girl's eyes .", + "length": 77 + }, + { + "text": "However, locals have spoken of their fears that the young girl is possessed by magic.", + "length": 85 + }, + { + "text": "They are then naturally pushed to the front of her eyes, before falling down her cheeks.", + "length": 88 + }, + { + "text": "A video of the bizarre phenomenon was posted on YouTube by Yemeni television channel Azal.", + "length": 90 + }, + { + "text": "Strange: A video of the phenomenon was posted on YouTube by Yemeni television channel Azal.", + "length": 91 + }, + { + "text": "'Some say the girl could be gripped by a magic spell, while others say it might be the devil.", + "length": 93 + }, + { + "text": "Saadiya Saleh, who lives in a village in Yemen, produces the small, hard stones beneath her eyelids.", + "length": 100 + }, + { + "text": "Uncomfortable: The young girl, who lives in Yemen, produces the tiny, hard stones beneath her eyelids .", + "length": 103 + }, + { + "text": "Bizarre phenomenon: Saadiya Saleh, 12, has baffled doctors after crying small stones instead of tears .", + "length": 103 + }, + { + "text": "A doctor then holds up a small box full of stones that have fallen out of Saadiya's eyes in just a few hours.", + "length": 109 + }, + { + "text": "'The case has triggered panic in the area where this girl lives,' a presenter can be heard saying in the video.", + "length": 111 + }, + { + "text": "A 12-year-old girl has baffled doctors after crying stones instead of tears - with locals fearing she may be 'possessed'.", + "length": 121 + }, + { + "text": "Medics said they could not give an explanation for the condition - with Saadiya not suffering from any known disease, according to the Daily Mirror.", + "length": 148 + }, + { + "text": "Mystery: Doctors are yet to come up with an explanation for the unusual condition, claiming that Saadiya (left) is not suffering from any known disease.", + "length": 152 + }, + { + "text": "Footage shows Saadiya lying on a hospital bed, surrounded by doctors and relatives - one of whom can be seen brushing a stone out of the child's eye with a cloth.", + "length": 162 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5594066381454468 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:43.9761797Z", + "first_section_created": "2025-12-23T09:34:43.97789487Z", + "last_section_published": "2025-12-23T09:34:43.978079077Z", + "all_results_received": "2025-12-23T09:34:44.047544486Z", + "output_generated": "2025-12-23T09:34:44.047681692Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:43.97789487Z", + "publish_time": "2025-12-23T09:34:43.978079077Z", + "first_worker_start": "2025-12-23T09:34:43.978698002Z", + "last_worker_end": "2025-12-23T09:34:44.046689Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:43.978708103Z", + "start_time": "2025-12-23T09:34:43.978774405Z", + "end_time": "2025-12-23T09:34:43.978823607Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:43.9789Z", + "start_time": "2025-12-23T09:34:43.979048Z", + "end_time": "2025-12-23T09:34:44.046689Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:43.9786468Z", + "start_time": "2025-12-23T09:34:43.978730504Z", + "end_time": "2025-12-23T09:34:43.978790506Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:43.9786388Z", + "start_time": "2025-12-23T09:34:43.978698002Z", + "end_time": "2025-12-23T09:34:43.978721003Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1914, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/004ad069a0867e8de59440a84c1b510422bea645.json b/data/output/004ad069a0867e8de59440a84c1b510422bea645.json new file mode 100644 index 0000000..d38e704 --- /dev/null +++ b/data/output/004ad069a0867e8de59440a84c1b510422bea645.json @@ -0,0 +1,326 @@ +{ + "file_name": "004ad069a0867e8de59440a84c1b510422bea645.txt", + "total_words": 664, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "i", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "them", + "count": 14 + }, + { + "word": "her", + "count": 13 + }, + { + "word": "lee", + "count": 12 + }, + { + "word": "of", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Lizzie Parry .", + "length": 14 + }, + { + "text": "before taking them home.", + "length": 24 + }, + { + "text": "them to a pigeon sanctuary.", + "length": 27 + }, + { + "text": "09:00 EST, 8 January 2014 .", + "length": 27 + }, + { + "text": "08:30 EST, 8 January 2014 .", + "length": 27 + }, + { + "text": "I have seen rats in her basement.", + "length": 33 + }, + { + "text": "have raised them all their lives .", + "length": 34 + }, + { + "text": "She said: 'If they are too injured, I take .", + "length": 44 + }, + { + "text": "I love them and they depend on me because I .", + "length": 45 + }, + { + "text": "’ Lee, of Pimlico, will be sentenced on February 4.", + "length": 53 + }, + { + "text": "‘If they are too injured, I take them to a pigeon sanctuary.", + "length": 62 + }, + { + "text": "But Lee was found to have breached the order on October 14, 2012.", + "length": 65 + }, + { + "text": "‘Then I put them in Gucci bags hanging off my Mamas \u0026 Papas pram.", + "length": 67 + }, + { + "text": "‘I then catch them in a net, which I carry with me everywhere I go.", + "length": 69 + }, + { + "text": "I love them and they depend on me because I have raised them all their lives.", + "length": 77 + }, + { + "text": "’ Lee said she catches the pigeons with a net, which she carries with her, .", + "length": 78 + }, + { + "text": "One neighbour, Austen Iverleigh, complained: ‘She generates squalor on an industrial scale.", + "length": 93 + }, + { + "text": "Lee, who suffers from multiple personality disorder, was deemed unfit to plead to the charges.", + "length": 94 + }, + { + "text": "I take them home and take the string off their feet, rub them in lavender and treat them with homeopathy.", + "length": 105 + }, + { + "text": "Lee, who suffers multiple personality disorder, was given the ASBO in December 2011 by Westminster Council .", + "length": 108 + }, + { + "text": "Nadia Lee, 67, spent 25 years fattening pigeons in the exclusive London borough, much to the dismay of locals.", + "length": 110 + }, + { + "text": "‘The problem is the feeding of pigeons on a huge scale, which causes serious disturbance to those around her.", + "length": 111 + }, + { + "text": "’ Privately educated Lee, a former secretary and pilot’s daughter, appeared crestfallen as the ruling was passed.", + "length": 117 + }, + { + "text": "Egyptian-born Lee told the court: ‘You cannot rescue pigeons without putting down seed and getting them all together.", + "length": 119 + }, + { + "text": "‘I’m not satisfied there is any evidence you can’t control this behaviour and the court expects you to control it.", + "length": 120 + }, + { + "text": "Westminster City Council took action in December 2011, banning her from feeding birds in the local area for three years.", + "length": 120 + }, + { + "text": "A jury at Southwark Crown Court were unable to agree on two further alleged breaches in July 2012, which were discharged.", + "length": 121 + }, + { + "text": "When the ASBO was originally granted, Lee said she had turned into a self-styled ‘bin lady’ after her mother’s death two decades ago.", + "length": 139 + }, + { + "text": "‘Whenever I have spoken to her, she has been very polite and I am sure that in any other respect, she is a very nice and reasonable person.", + "length": 141 + }, + { + "text": "A pensioner nicknamed the ‘Pigeon Lady of Pimlico’ has been found guilty of flouting an ASBO banning her from feeding the birds near her home.", + "length": 146 + }, + { + "text": "Residents in Cumberland Street, Pimlico complained their luxury cars were being peppered with bird droppings, and said the food was attracting rats .", + "length": 149 + }, + { + "text": "Wearing a bright blue scarf, black fleece jacket, jeans and trainers, she sat in stunned silence before being accompanied out of the court by a friend.", + "length": 151 + }, + { + "text": "Nadia Lee, dubbed the 'Pigeon lady of Pimlico' has been found guilty of flouting an Asbo banning her from feeding the birds in her exclusive neighbourhood .", + "length": 156 + }, + { + "text": "’ But Lee was slapped with the ASBO after neighbours complained their luxury cars were being peppered with droppings and said the feed was encouraging rats.", + "length": 158 + }, + { + "text": "Seven residents and two council officials told how they were unable to enjoy the roof terraces and gardens of their multi-million pound homes because of pigeon dirt.", + "length": 165 + }, + { + "text": "She became renowned for scooping stricken birds up into her Gucci handbag and taking them to her £750,000 Georgian maisonette where they are rubbed in lavender in a ‘homeopathy treatment.", + "length": 190 + }, + { + "text": "’ Addressing Lee, the judge said: ‘Regressing into a different personality is one thing that has rendered you at least temporarily unfit to be tried, but the extent which you can control your behaviour is another issue.", + "length": 223 + }, + { + "text": "Adjourning sentence for reports, Judge Martin Beddoe said: ‘ASBO or no ASBO, it’s going to be a condition of her bail not to discharge any food stuff in any public place, covered by the area as identified in the existing order.", + "length": 231 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6924393177032471 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:44.478866626Z", + "first_section_created": "2025-12-23T09:34:44.479161138Z", + "last_section_published": "2025-12-23T09:34:44.479361346Z", + "all_results_received": "2025-12-23T09:34:44.553633049Z", + "output_generated": "2025-12-23T09:34:44.553813256Z", + "total_processing_time_ms": 74, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 74, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:44.479161138Z", + "publish_time": "2025-12-23T09:34:44.479361346Z", + "first_worker_start": "2025-12-23T09:34:44.479910568Z", + "last_worker_end": "2025-12-23T09:34:44.552732Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:44.479926469Z", + "start_time": "2025-12-23T09:34:44.479994871Z", + "end_time": "2025-12-23T09:34:44.480081175Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:44.480105Z", + "start_time": "2025-12-23T09:34:44.480256Z", + "end_time": "2025-12-23T09:34:44.552732Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:44.479849066Z", + "start_time": "2025-12-23T09:34:44.479910568Z", + "end_time": "2025-12-23T09:34:44.479993971Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:44.479901168Z", + "start_time": "2025-12-23T09:34:44.479975271Z", + "end_time": "2025-12-23T09:34:44.480003472Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 72, + "min_processing_ms": 72, + "max_processing_ms": 72, + "avg_processing_ms": 72, + "median_processing_ms": 72, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3878, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/004b513bb2f93dca70386dad16c9fb5770ebdc49.json b/data/output/004b513bb2f93dca70386dad16c9fb5770ebdc49.json new file mode 100644 index 0000000..c3efd88 --- /dev/null +++ b/data/output/004b513bb2f93dca70386dad16c9fb5770ebdc49.json @@ -0,0 +1,210 @@ +{ + "file_name": "004b513bb2f93dca70386dad16c9fb5770ebdc49.txt", + "total_words": 279, + "top_n_words": [ + { + "word": "of", + "count": 14 + }, + { + "word": "his", + "count": 10 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "ravi", + "count": 8 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "jail", + "count": 5 + }, + { + "word": "on", + "count": 5 + }, + { + "word": "said", + "count": 5 + }, + { + "word": "the", + "count": 5 + }, + { + "word": "to", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "On Monday, U.", + "length": 13 + }, + { + "text": "His lawyer filed a notice of appeal of his conviction earlier this month.", + "length": 73 + }, + { + "text": "Immigrations and Customs Enforcement said it would not deport Ravi to his native India.", + "length": 87 + }, + { + "text": "Ravi began serving his term on May 31, two days after apologizing in a written statement for spying on Clementi.", + "length": 112 + }, + { + "text": "The judge said he took Ravi's youth and his lack of a criminal record into consideration when handing down his sentence.", + "length": 120 + }, + { + "text": "Dharun Ravi, 20, was found guilty in May of invasion of privacy, witness tampering, hindering apprehension and bias intimidation.", + "length": 129 + }, + { + "text": "Ravi was released early after jail officials applied five days of good behavior and five days of work credit to his term, Cicchi said.", + "length": 134 + }, + { + "text": "He left the Middlesex County Jail in North Brunswick, New Jersey, on Tuesday morning, according to Edmond Cicchi, warden of the Middlesex County Office of Adult Corrections.", + "length": 173 + }, + { + "text": "The former Rutgers University student convicted of spying on and intimidating his gay roommate was released from jail Tuesday after serving his sentence, a jail official said.", + "length": 175 + }, + { + "text": "Ravi's former roommate, 18-year-old Tyler Clementi, killed himself by jumping off a New York bridge after learning Ravi had secretly recorded Clementi and his partner with a webcam.", + "length": 181 + }, + { + "text": "The agency is legally prohibited from deporting legal permanent residents unless they have been convicted of crimes such as an aggravated felony, domestic violence or drug or weapons offenses, a spokesman said.", + "length": 210 + }, + { + "text": "While Ravi could have been sentenced to 10 years in prison, New Jersey Superior Judge Glenn Berman instead gave him a 30-day jail sentence, three years of probation and ordered him to complete 300 hours of community service aimed at assisting victims of bias crimes.", + "length": 266 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8024306893348694 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:44.980115693Z", + "first_section_created": "2025-12-23T09:34:44.980447307Z", + "last_section_published": "2025-12-23T09:34:44.980608413Z", + "all_results_received": "2025-12-23T09:34:45.042396411Z", + "output_generated": "2025-12-23T09:34:45.042545217Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:44.980447307Z", + "publish_time": "2025-12-23T09:34:44.980608413Z", + "first_worker_start": "2025-12-23T09:34:44.981226138Z", + "last_worker_end": "2025-12-23T09:34:45.041517Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:44.981197937Z", + "start_time": "2025-12-23T09:34:44.981257939Z", + "end_time": "2025-12-23T09:34:44.981292741Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:44.98141Z", + "start_time": "2025-12-23T09:34:44.981544Z", + "end_time": "2025-12-23T09:34:45.041517Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:44.981176336Z", + "start_time": "2025-12-23T09:34:44.981226138Z", + "end_time": "2025-12-23T09:34:44.98127134Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:44.981163936Z", + "start_time": "2025-12-23T09:34:44.981231338Z", + "end_time": "2025-12-23T09:34:44.981249939Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1686, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/004b7045c61fab2a7189f8c09cae11ef1d4c5cfc.json b/data/output/004b7045c61fab2a7189f8c09cae11ef1d4c5cfc.json new file mode 100644 index 0000000..b042473 --- /dev/null +++ b/data/output/004b7045c61fab2a7189f8c09cae11ef1d4c5cfc.json @@ -0,0 +1,420 @@ +{ + "file_name": "004b7045c61fab2a7189f8c09cae11ef1d4c5cfc.txt", + "total_words": 1035, + "top_n_words": [ + { + "word": "and", + "count": 42 + }, + { + "word": "the", + "count": 39 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "with", + "count": 18 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "s", + "count": 15 + }, + { + "word": "new", + "count": 13 + }, + { + "word": "street", + "count": 13 + }, + { + "word": "to", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "Calexico .", + "length": 10 + }, + { + "text": "Milk Truck .", + "length": 12 + }, + { + "text": "Lumpia Shack .", + "length": 14 + }, + { + "text": "Solber Pupusas .", + "length": 16 + }, + { + "text": "Dirty water dogs .", + "length": 18 + }, + { + "text": "Various locations .", + "length": 19 + }, + { + "text": "Various locations .", + "length": 19 + }, + { + "text": "Bolivian Llama Party .", + "length": 22 + }, + { + "text": "Breakfast cart bagels .", + "length": 23 + }, + { + "text": "King of Falafel \u0026 Shawarma .", + "length": 28 + }, + { + "text": "Red Hook Lobster Pound food truck .", + "length": 35 + }, + { + "text": "Milk Truck; locations vary; +1 646 233 3838 .", + "length": 45 + }, + { + "text": "Lines form before the tiny street stall as early as 11 a.", + "length": 57 + }, + { + "text": "The most popular item by far is the grilled cheese sandwich.", + "length": 60 + }, + { + "text": "Calexico; Prince Street and Wooster Street; +1 646 590 4172 .", + "length": 61 + }, + { + "text": "each Saturday for lumpia, crispy, Filipino-inspired spring rolls.", + "length": 65 + }, + { + "text": "New Yorkers don't need to go to New England for a good lobster roll.", + "length": 68 + }, + { + "text": "Red Hook Lobster Pound Food Truck; locations vary; +1 718 858 7650 .", + "length": 68 + }, + { + "text": "King of Falafel \u0026 Shawarma; 53rd Street and Park Avenue; +1 718 838 8029 .", + "length": 74 + }, + { + "text": "Lumpia Shack; Smorgasburg at Kent Avenue and Wythe Avenue, Brooklyn; +1 917 475 1621 .", + "length": 86 + }, + { + "text": "Served on banana leaves with a tangy slaw, the Salvadoran tamales are also crowd favorites.", + "length": 91 + }, + { + "text": "Many pre-prepare their bagels for convenience, but most carts will make your bagel to order.", + "length": 92 + }, + { + "text": "It's neither sophisticated nor gourmet, but it's the quintessential New York food experience.", + "length": 93 + }, + { + "text": "Bolivian Llama Party; Smorgasburg at Kent Avenue and Wythe Avenue, Brooklyn; +1 347 395 5481 .", + "length": 94 + }, + { + "text": "Thanks to Big Red, Red Hook Lobster Pound's lobster shack on wheels, they only need to walk to the curb.", + "length": 104 + }, + { + "text": "Solber Pupusas; Brooklyn Flea Market at Lafayette Avenue and Vanderbilt Avenue, Brooklyn; +1 516 965 0214 .", + "length": 107 + }, + { + "text": "street food staples can now be enjoyed on the streets of Brooklyn thanks to this popular Smorgasburg stall.", + "length": 107 + }, + { + "text": "Bessie, Milk Truck's sunshine-yellow food truck, is a welcome sight for hungry New Yorkers during lunch hour.", + "length": 109 + }, + { + "text": "The signature platter comes loaded with two pupusas, tangy curtido, pickled jalapenos, tomato sauce and sour cream.", + "length": 115 + }, + { + "text": "There are three variations: the classic, the classic with onion and mustard, and a hearty three-cheese version with apple.", + "length": 122 + }, + { + "text": "Every day, the truck's perpetually cheerful staff hawk classic American comfort foods like mac and cheese and turkey chili.", + "length": 123 + }, + { + "text": "Each roll is made using locally sourced ground pork, roasted duck or truffled adobo mushrooms, hand-rolled and then deep-fried.", + "length": 127 + }, + { + "text": "(CNN)New York may be a paradise of Zagat-rated, Michelin-starred restaurants, but some of its best food can be found on the streets.", + "length": 132 + }, + { + "text": "Hundreds of mobile eateries hawking gourmet global cuisine occupy corners across the city, alongside traditional hotdog vendors and halal carts.", + "length": 144 + }, + { + "text": "Though Lumpia Shack has recently upgraded to its own brick-and-mortar, its original location at Brooklyn's Smorgasburg street food market still remains.", + "length": 152 + }, + { + "text": "Unlike California, New York isn't renowned for its Mexican food, but the city has stepped up its game in recent years, thanks in large part to Calexico.", + "length": 152 + }, + { + "text": "Culinary heavyweights Anthony Bourdain, Marcus Samuelsson and Martha Stewart are all said to be fans of Vendy-winning Solber Pupusas, and it's no wonder.", + "length": 153 + }, + { + "text": "Despite a price tag high that's high for the streets -- $16 per roll at the time of writing -- the truck still sells between 300-400 rolls every two hours.", + "length": 155 + }, + { + "text": "Though saltenas -- crusty, empanada-like pastries filled with meat and vegetables --are easily its best-selling item, the chola slider is the real star here.", + "length": 157 + }, + { + "text": "New York boasts many terrific brick-and-mortar bagel shops, but you won't get a cheaper or more authentic breakfast than a bagel and coffee from a street cart.", + "length": 159 + }, + { + "text": "Coffee, usually deli-quality, is served in small blue-and-white Anthora cups that have become as characteristic of New York as yellow cabs and dirty water dogs.", + "length": 160 + }, + { + "text": "Rolls come Maine-style, served cold with mayo, or Connecticut-style, served warm with butter and lemon, each stuffed with a quarter pound of fresh Maine lobster.", + "length": 161 + }, + { + "text": "The modern take on the humble sanduiche de chola comes stuffed with either pork or beef brisket and topped with hibiscus-pickled onions, carrots, kolla cheese and parsley.", + "length": 171 + }, + { + "text": "Every morning, locals file out of the subway and make a beeline for the nearest silver breakfast cart, whose narrow shelves are stocked high with bagels and pastries of every kind.", + "length": 180 + }, + { + "text": "What started out as a lone taco cart in SoHo in 2006 -- one of New York's first -- has since grown into a fleet of carts across the city and a handful of brick-and-mortar locations.", + "length": 181 + }, + { + "text": "Originally a Queens staple, the cart dominated the corner of 30th Street and Broadway in Astoria for almost a decade before it won the Vendy Award for New York's Best Street Food in 2010.", + "length": 187 + }, + { + "text": "Named after the warm, salty water it's soaked in, the hot frank is served in a soft bun (which sops up residual water) and then topped with ketchup, mustard, onions, relish and sauerkraut.", + "length": 188 + }, + { + "text": "Despite not having a regular location -- Bessie's daily whereabouts must be tracked online -- Milk Truck has become a fixture in the New York street food scene thanks to its fiercely loyal following.", + "length": 199 + }, + { + "text": "Unlike regular street food, Lumpia Shack's plating is restaurant quality: the lumpia are arranged artfully on a tray, drizzled with homemade sauce and garnished with pea shoots and pickled vegetables.", + "length": 200 + }, + { + "text": "Halal carts slinging styrofoam plates piled high with falafel, shawarma and rice are ubiquitous in New York, but you'll recognize \"the King\" by the seemingly endless line crowding the sidewalk beside it.", + "length": 203 + }, + { + "text": "Sometimes it feels like almost every other Manhattan street corner is dressed with the ubiquitous blue and yellow striped Sabrett umbrella, under which you'll find New York's most iconic street food: the dirty water dog.", + "length": 220 + }, + { + "text": "Its original SoHo cart remains its most popular location, still slinging soft corn tacos cradling slow-cooked chipotle pork, hearty bowls of jalapeno cheddar grits and burritos packed with beer-battered fish, beans, rice and Monterey Jack cheese.", + "length": 246 + }, + { + "text": "Husband and wife owners Rafael and Reina Soler-Bermudez (\"Solber\" is a portmanteau of their last names) have been making the stuffed Salvadoran corn tortillas in their tiny mobile pupuseria for more than 15 years, selling more than 600 on a regular day.", + "length": 253 + }, + { + "text": "Now, its second cart in Midtown Manhattan peddles its famous falafel and shawarma to the masses, in addition to meaty plates like the Freddy's Junior: chicken, kefta and basmati rice topped with chopped onion and doused liberally in tahini and chile sauce.", + "length": 256 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.3588303178548813 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:45.481290957Z", + "first_section_created": "2025-12-23T09:34:45.481668273Z", + "last_section_published": "2025-12-23T09:34:45.482010587Z", + "all_results_received": "2025-12-23T09:34:45.58003005Z", + "output_generated": "2025-12-23T09:34:45.580267259Z", + "total_processing_time_ms": 98, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 98, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:45.481668273Z", + "publish_time": "2025-12-23T09:34:45.481898082Z", + "first_worker_start": "2025-12-23T09:34:45.482386802Z", + "last_worker_end": "2025-12-23T09:34:45.579068Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:45.482357301Z", + "start_time": "2025-12-23T09:34:45.482457205Z", + "end_time": "2025-12-23T09:34:45.48258941Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:45.482577Z", + "start_time": "2025-12-23T09:34:45.482726Z", + "end_time": "2025-12-23T09:34:45.579068Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 96 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:45.482308999Z", + "start_time": "2025-12-23T09:34:45.482399302Z", + "end_time": "2025-12-23T09:34:45.482519407Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:45.482314199Z", + "start_time": "2025-12-23T09:34:45.482386802Z", + "end_time": "2025-12-23T09:34:45.482418303Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:45.481948384Z", + "publish_time": "2025-12-23T09:34:45.482010587Z", + "first_worker_start": "2025-12-23T09:34:45.482435304Z", + "last_worker_end": "2025-12-23T09:34:45.56162Z", + "total_journey_time_ms": 79, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:45.482506407Z", + "start_time": "2025-12-23T09:34:45.482671913Z", + "end_time": "2025-12-23T09:34:45.482693714Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:45.482665Z", + "start_time": "2025-12-23T09:34:45.482781Z", + "end_time": "2025-12-23T09:34:45.56162Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 78 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:45.482440104Z", + "start_time": "2025-12-23T09:34:45.482473605Z", + "end_time": "2025-12-23T09:34:45.482507407Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:45.482406603Z", + "start_time": "2025-12-23T09:34:45.482435304Z", + "end_time": "2025-12-23T09:34:45.482447104Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 174, + "min_processing_ms": 78, + "max_processing_ms": 96, + "avg_processing_ms": 87, + "median_processing_ms": 96, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3084, + "slowest_section_id": 0, + "slowest_section_time_ms": 97 + } +} diff --git a/data/output/004bc0c56f47d2819e5e255389503691bb15cc4b.json b/data/output/004bc0c56f47d2819e5e255389503691bb15cc4b.json new file mode 100644 index 0000000..276e83f --- /dev/null +++ b/data/output/004bc0c56f47d2819e5e255389503691bb15cc4b.json @@ -0,0 +1,310 @@ +{ + "file_name": "004bc0c56f47d2819e5e255389503691bb15cc4b.txt", + "total_words": 618, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "her", + "count": 16 + }, + { + "word": "i", + "count": 15 + }, + { + "word": "she", + "count": 15 + }, + { + "word": "was", + "count": 15 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "bruton", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "au.", + "length": 3 + }, + { + "text": "com.", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "eventbrite.", + "length": 11 + }, + { + "text": "Emily Crane .", + "length": 13 + }, + { + "text": "It was pretty amazing.", + "length": 22 + }, + { + "text": "I was in heaps of pain.", + "length": 23 + }, + { + "text": "'I had a really bad day...", + "length": 26 + }, + { + "text": "I didn’t think it was for me.", + "length": 31 + }, + { + "text": "We came home and the car was there.", + "length": 35 + }, + { + "text": "For more information, visit jessiebruton.", + "length": 41 + }, + { + "text": "It’s been really hard to rely on others so much.", + "length": 50 + }, + { + "text": "'They have no idea what they’ve given me,' she said.", + "length": 54 + }, + { + "text": "I felt normal for the first time in 12 months,' she said.", + "length": 57 + }, + { + "text": "'Everything was in place and it turned our world upside down.", + "length": 61 + }, + { + "text": "But I rang a few people and no one knew anything,' Mrs Bruton said.", + "length": 67 + }, + { + "text": "'My husband and I went and had a coffee last night and I drove him.", + "length": 67 + }, + { + "text": "'I stared at it for ages, but when I jumped in I realised it had been converted for me.", + "length": 87 + }, + { + "text": "Her friends have organised a fundraiser at Wests Illawarra Leagues Club on Friday night.", + "length": 88 + }, + { + "text": "Jessie said she has had to rely on her husband Mick more than ever in the past 12 months .", + "length": 90 + }, + { + "text": "'Klay was our last baby, we’d bought a house, I'd finished my certificate III in fitness.", + "length": 91 + }, + { + "text": "Mrs Bruton said she would be 'forever grateful' and wished she could hug the anonymous donor.", + "length": 93 + }, + { + "text": "'At first I thought my friend Kylie was behind it because she’s been organising a fundraiser.", + "length": 95 + }, + { + "text": "We sat around out the front for ages before we checked if it was open,' Mrs Bruton told MailOnline.", + "length": 99 + }, + { + "text": "An anonymous donor dropped off a custom-fitted Nissan Pathfinder on the 27-year-old's front lawn last month .", + "length": 109 + }, + { + "text": "The mother of three found the white Nissan Pathfinder outside her house in Oak Flats, south of Wollongong, last month.", + "length": 118 + }, + { + "text": "'My husband Mick was crying, I was crying and my boys were running around trying to find DVDs to put in the car,' she said.", + "length": 123 + }, + { + "text": "Mrs Bruton, who had been driving for 10 years before she was left paralysed, passed her disability driving test on Tuesday.", + "length": 123 + }, + { + "text": "The Bruton's used the hand-over papers in the car to contact the seller but she wouldn’t reveal who the generous buyer was.", + "length": 125 + }, + { + "text": "Mrs Burton, her husband and three boys had to move back in with her parents while they raise money to modify their family home.", + "length": 127 + }, + { + "text": "The generous donor left a card inside the car saying they hoped the gift would make Mrs Bruton smile considering what she has faced.", + "length": 132 + }, + { + "text": "'Then I thought they did it out of the kindness of their heart so I shouldn’t dig around to find them if they don’t want me to,' she said.", + "length": 142 + }, + { + "text": "Jessie Bruton from Oak Flats lost feeling in both of her legs after she suffered nerve damage while she was pregnant with her third son nine months ago .", + "length": 153 + }, + { + "text": "Doctors are not sure if Ms Bruton will ever walk again unaided, but the former fitness instructor said she was doing as much physiotherapy as possible to help.", + "length": 159 + }, + { + "text": "Jessie Bruton, 27, lost feeling in both of her legs after pressure on her spine caused severe nerve damage while she was pregnant with her third son nine months ago.", + "length": 165 + }, + { + "text": "' The car had been fitted with hand controls and is big enough to accommodate her walking frame, wheelchairs and prams for her young sons Levi, 7, Nash, 2, and six-month-old Klay .", + "length": 180 + }, + { + "text": "' The car has been fitted with hand controls and is big enough to accommodate her walking frame, wheelchairs and prams for her young sons - Levi, 7, Nash, 2, and six-month-old Klay.", + "length": 181 + }, + { + "text": "A mother left paralysed after giving birth has arrived home to find a gleaming $45,000 Nissan Navara complete with a giant red bow parked on her front lawn and a note from an anonymous Good Samaritan.", + "length": 200 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7592332363128662 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:45.98316385Z", + "first_section_created": "2025-12-23T09:34:45.983513364Z", + "last_section_published": "2025-12-23T09:34:45.983691571Z", + "all_results_received": "2025-12-23T09:34:46.047115636Z", + "output_generated": "2025-12-23T09:34:46.047373746Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:45.983513364Z", + "publish_time": "2025-12-23T09:34:45.983691571Z", + "first_worker_start": "2025-12-23T09:34:45.984202492Z", + "last_worker_end": "2025-12-23T09:34:46.046208Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:45.984173891Z", + "start_time": "2025-12-23T09:34:45.984233393Z", + "end_time": "2025-12-23T09:34:45.984292096Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:45.984393Z", + "start_time": "2025-12-23T09:34:45.984544Z", + "end_time": "2025-12-23T09:34:46.046208Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:45.984175891Z", + "start_time": "2025-12-23T09:34:45.984223793Z", + "end_time": "2025-12-23T09:34:45.984308696Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:45.984141289Z", + "start_time": "2025-12-23T09:34:45.984202492Z", + "end_time": "2025-12-23T09:34:45.984230993Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3281, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/004bf66c91abad70ac266212045ac4a1f9e3b2d3.json b/data/output/004bf66c91abad70ac266212045ac4a1f9e3b2d3.json new file mode 100644 index 0000000..d509b2f --- /dev/null +++ b/data/output/004bf66c91abad70ac266212045ac4a1f9e3b2d3.json @@ -0,0 +1,370 @@ +{ + "file_name": "004bf66c91abad70ac266212045ac4a1f9e3b2d3.txt", + "total_words": 806, + "top_n_words": [ + { + "word": "the", + "count": 60 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "coffee", + "count": 15 + }, + { + "word": "are", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "is", + "count": 12 + }, + { + "word": "from", + "count": 11 + }, + { + "word": "to", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "vitamins and nutrition.", + "length": 23 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Another farmer compared .", + "length": 25 + }, + { + "text": "from boredom and depression.", + "length": 28 + }, + { + "text": "08:09 EST, 21 September 2013 .", + "length": 30 + }, + { + "text": "04:29 EST, 21 September 2013 .", + "length": 30 + }, + { + "text": "One farmer explained civets are .", + "length": 33 + }, + { + "text": "Once they are cleaned they are roasted.", + "length": 39 + }, + { + "text": "Caged: A civet cat on a farm in Indonesia.", + "length": 42 + }, + { + "text": "generally kept caged for around three years.", + "length": 44 + }, + { + "text": "However, undercover video footage shows civets .", + "length": 48 + }, + { + "text": "' Shop: Harrods said it was confident in its suppliers.", + "length": 55 + }, + { + "text": "This is carried out through strict auditing procedures.", + "length": 55 + }, + { + "text": "Confined: Cats are kept in tiny cages for most of the day .", + "length": 59 + }, + { + "text": "' The coffee has surged in popularity, especially in the U.", + "length": 59 + }, + { + "text": "There is no suggestion of animal cruelty on the company's estate.", + "length": 65 + }, + { + "text": "exhibiting neurotic behaviour such as incessant pacing, spinning and .", + "length": 70 + }, + { + "text": "The up-market shop sells the beans which can fetch more than £300 a kilo .", + "length": 75 + }, + { + "text": "head-bobbing – indications that the wild-caught animals are going insane .", + "length": 76 + }, + { + "text": "and Japan after featuring on the Oprah Winfrey Show and the film, The Bucket List.", + "length": 82 + }, + { + "text": "PETA UK Associate Director Mimi Bekhechi said farms which do enclosed civets were cruel.", + "length": 88 + }, + { + "text": "'We do not sell any of the coffee beans from the caged luwak as it is against our business model.", + "length": 97 + }, + { + "text": "' Let me out: Industry experts are convinced non-wild civet coffee has ended up on the London market .", + "length": 102 + }, + { + "text": "Frightened: The coffee beans are produced from the civet cats' excrement and can cost up to £60 a cup .", + "length": 104 + }, + { + "text": "No hope: This cat looks particularly unhappy as it peers into the camera lens of an animal cruelty investigator .", + "length": 113 + }, + { + "text": "Locals pick the ‘coffee beans’ from the faeces the cats which eat the ripe coffee berries as part of their diet.", + "length": 116 + }, + { + "text": "Dark secret: The final product is often sold as 'wild' - but these images prove the cats are anything but free-range .", + "length": 118 + }, + { + "text": "An investigation found the animals, which produce the sought-after kopi kuwak coffee, suffer in appalling conditions .", + "length": 118 + }, + { + "text": "Trapped: The animals are fed vast amounts of berries, which help produce the rich and distinctive flavour of the beans .", + "length": 120 + }, + { + "text": "Money spinner: Harrods is among the shops to sell the delicacy in London but it insists its product is entirely ethical .", + "length": 121 + }, + { + "text": "The coffee they produce is described as filled with a deep, mellow flavour, not acidic, with a ‘unique, soft, sweet taste’.", + "length": 127 + }, + { + "text": "The comapny said in a statement: 'In our caged civet cats programme, we study the animal behaviour, diet and its breeding behaviour.", + "length": 132 + }, + { + "text": "Distressed: Investigators from People for the Ethical Treatment of Animals found cats injured, pacing and showing signs of depression .", + "length": 135 + }, + { + "text": "'Our exclusive supplier… has given Harrods every assurance the coffee we are provided with is organic, and comes from wild palm civets.", + "length": 137 + }, + { + "text": "civets eating too many coffee berries to humans smoking, as the civets' health deteriorates greatly during captivity because of a lack of .", + "length": 139 + }, + { + "text": "Despite the battery farm conditions, many farms sell the product as 'wild', sourced in the jungle from the droppings of free-roaming animals.", + "length": 141 + }, + { + "text": "However, it said the product supplied to Harrods came only from free-range animals and animals were put inside only to study their behaviour.", + "length": 141 + }, + { + "text": "Workers said some civets were kept enclosed, despite initial denials from the company, which later backed down and admitted it DID use cages.", + "length": 141 + }, + { + "text": "Cramped: The coffee has become popular among the rich and famous - but these pictures shine new light on how the expensive delicacy is produced .", + "length": 145 + }, + { + "text": "'In order to sustain or meet the demand for this market, we breed our own civet cats and then release them in our farm when they are mature enough.", + "length": 147 + }, + { + "text": "Harrods said: 'Harrods works closely with all its suppliers to ensure the highest standards of ethical sourcing, production and trade are maintained.", + "length": 149 + }, + { + "text": "' In the wild, civets frequently climb trees to reach the ripe coffee berries, but in captivity, they are fed more of the fruit than would ever be natural for them.", + "length": 164 + }, + { + "text": "'Confining civet cats for years – as they go mad and lose their fur from the stress – for an expensive coffee would turn the stomach of any compassionate person.", + "length": 165 + }, + { + "text": "Confined to tiny, filthy cages and suffering from skin infections, this is how farmers are treating Asian civet cats which produce one of the most expensive coffees in the world.", + "length": 178 + }, + { + "text": "In a separate investigation by the BBC, it was found one of the most popular products called Wahana Luwak, which is supplied to Harrods, was being produced by a company, Sari Makmur, which uses caged civets.", + "length": 207 + }, + { + "text": "Tony Wild, former coffee trader and author of 'Coffee: A Dark History', told the BBC: 'The whole reason everybody regurgitates that story is that by being incredibly rare, you can keep a ridiculously high price.", + "length": 211 + }, + { + "text": "The shocking conditions were discovered by animal investigators on farms in Indonesia and the Philippines which produce the much sought-after kopi kuwak, sold in Harrods and which can cost up to £60 a cup in some restaurants.", + "length": 226 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8066933155059814 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:46.484456019Z", + "first_section_created": "2025-12-23T09:34:46.484840134Z", + "last_section_published": "2025-12-23T09:34:46.485085444Z", + "all_results_received": "2025-12-23T09:34:46.550557492Z", + "output_generated": "2025-12-23T09:34:46.551189117Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:46.484840134Z", + "publish_time": "2025-12-23T09:34:46.485085444Z", + "first_worker_start": "2025-12-23T09:34:46.485607265Z", + "last_worker_end": "2025-12-23T09:34:46.549669Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:46.485668668Z", + "start_time": "2025-12-23T09:34:46.485740771Z", + "end_time": "2025-12-23T09:34:46.485824274Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:46.485848Z", + "start_time": "2025-12-23T09:34:46.486012Z", + "end_time": "2025-12-23T09:34:46.549669Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:46.485645567Z", + "start_time": "2025-12-23T09:34:46.48571887Z", + "end_time": "2025-12-23T09:34:46.485841275Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:46.485529662Z", + "start_time": "2025-12-23T09:34:46.485607265Z", + "end_time": "2025-12-23T09:34:46.485648567Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4842, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/004c3a845dfb1c3b39f8b6c49e181568515ac4ba.json b/data/output/004c3a845dfb1c3b39f8b6c49e181568515ac4ba.json new file mode 100644 index 0000000..6505a7f --- /dev/null +++ b/data/output/004c3a845dfb1c3b39f8b6c49e181568515ac4ba.json @@ -0,0 +1,282 @@ +{ + "file_name": "004c3a845dfb1c3b39f8b6c49e181568515ac4ba.txt", + "total_words": 360, + "top_n_words": [ + { + "word": "the", + "count": 15 + }, + { + "word": "bledsoe", + "count": 14 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "his", + "count": 10 + }, + { + "word": "he", + "count": 9 + }, + { + "word": "logan", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "at", + "count": 7 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "as", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Mr.", + "length": 3 + }, + { + "text": "Mr.", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Jonathan Block .", + "length": 16 + }, + { + "text": "'How ya doin’' Mr.", + "length": 20 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'I feel great,' Bledsoe said.", + "length": 29 + }, + { + "text": "Bledsoe said as he embraced Logan.", + "length": 34 + }, + { + "text": "It was more than I thought it would be.", + "length": 39 + }, + { + "text": "What he got was an awesome surprise: His father, a U.", + "length": 53 + }, + { + "text": "'I didn’t expect that much of a reaction from him ...", + "length": 55 + }, + { + "text": "Bledsoe has been serving in Afghanistan for nine months.", + "length": 56 + }, + { + "text": "He then picked up his son and they gave each other a huge hug.", + "length": 62 + }, + { + "text": "Army soldier deployed to Afghanistan, met him on center court.", + "length": 62 + }, + { + "text": "Logan (left) and Army Specialist Jamie Bledsoe talk with reporters after Mr.", + "length": 76 + }, + { + "text": "Army Specialist Jamie Bledsoe is a member of the California National Guard .", + "length": 76 + }, + { + "text": "Father and son immediately embraced, with Logan brought to tears at the surprise.", + "length": 81 + }, + { + "text": "Bledsoe said he was somewhat surprised at how emotional his son got after seeing him.", + "length": 85 + }, + { + "text": "Logan Bledsoe (right) sees his father, Army Specialist Jamie Bledsoe, who he immediately embraces .", + "length": 99 + }, + { + "text": "'It’s been really hard, just to live by myself with my mom, grandma and grandpa,' Logan told KTVK.", + "length": 100 + }, + { + "text": "The moment led to a standing ovation from the crowd as many fans shed tears at the emotional reunion.", + "length": 101 + }, + { + "text": "Logan Bledsoe, 8, went to Monday night's Phoenix Suns basketball match thinking he would see a great game.", + "length": 106 + }, + { + "text": "While his father has been serving abroad, Logan had chatted with his dad as often as possible through Skype.", + "length": 108 + }, + { + "text": "During a 'contest' at the game, Bledsoe was blindfolded and then lead to center court at the US Airways Center.", + "length": 111 + }, + { + "text": "Bledsoe, who spent nine months in Afghanistan, surprised his son at a Phoenix Suns basketball game Monday night .", + "length": 113 + }, + { + "text": "When he got there and removed the blindfold stood his dad, Specialist Jamie Bledsoe of the California National Guard .", + "length": 118 + }, + { + "text": "' The reunion was made possible by the Phoenix Suns and the Streets of New York, a local pizza chain that sponsors the 'Welcome Home Hero' campaign.", + "length": 148 + }, + { + "text": "Logan Bledsoe, 8, is blindfolded as he is lead to center court at the US Airways Center as part of a 'contest' Logan Bledsoe (right) is genuinely surprised the moment he takes off his blindfold and unexpectedly sees his father in front of him .", + "length": 244 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.406809538602829 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:46.986201106Z", + "first_section_created": "2025-12-23T09:34:46.988186887Z", + "last_section_published": "2025-12-23T09:34:46.988368094Z", + "all_results_received": "2025-12-23T09:34:47.057462788Z", + "output_generated": "2025-12-23T09:34:47.057680096Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:46.988186887Z", + "publish_time": "2025-12-23T09:34:46.988368094Z", + "first_worker_start": "2025-12-23T09:34:46.988827312Z", + "last_worker_end": "2025-12-23T09:34:47.056553Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:46.98877821Z", + "start_time": "2025-12-23T09:34:46.988827312Z", + "end_time": "2025-12-23T09:34:46.988868614Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:46.989047Z", + "start_time": "2025-12-23T09:34:46.989186Z", + "end_time": "2025-12-23T09:34:47.056553Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:46.988789811Z", + "start_time": "2025-12-23T09:34:46.988947217Z", + "end_time": "2025-12-23T09:34:46.989059922Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:46.988813512Z", + "start_time": "2025-12-23T09:34:46.988862414Z", + "end_time": "2025-12-23T09:34:46.988881415Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2058, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/004c50f7a89e2fd391fbbbc4f5cd1e15c3520fd7.json b/data/output/004c50f7a89e2fd391fbbbc4f5cd1e15c3520fd7.json new file mode 100644 index 0000000..19cfa2b --- /dev/null +++ b/data/output/004c50f7a89e2fd391fbbbc4f5cd1e15c3520fd7.json @@ -0,0 +1,548 @@ +{ + "file_name": "004c50f7a89e2fd391fbbbc4f5cd1e15c3520fd7.txt", + "total_words": 1140, + "top_n_words": [ + { + "word": "the", + "count": 84 + }, + { + "word": "to", + "count": 35 + }, + { + "word": "of", + "count": 28 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "was", + "count": 24 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "s", + "count": 18 + }, + { + "word": "bin", + "count": 16 + }, + { + "word": "laden", + "count": 16 + }, + { + "word": "and", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "lair.", + "length": 5 + }, + { + "text": "Some U.", + "length": 7 + }, + { + "text": "reported.", + "length": 9 + }, + { + "text": "carried out.", + "length": 12 + }, + { + "text": "S Navy Seals .", + "length": 14 + }, + { + "text": "official said.", + "length": 14 + }, + { + "text": "half-a-dozen U.", + "length": 15 + }, + { + "text": "But he had been .", + "length": 17 + }, + { + "text": "The terror chief had .", + "length": 22 + }, + { + "text": "But was he in control?", + "length": 22 + }, + { + "text": "official told the Journal.", + "length": 26 + }, + { + "text": "‘That's possibly doable.", + "length": 26 + }, + { + "text": "Counter-terrorism staff from .", + "length": 30 + }, + { + "text": "on the 10th anniversary of 9/11.", + "length": 32 + }, + { + "text": "special forces earlier this year.", + "length": 33 + }, + { + "text": "Plans for an attack on the 9/11 .", + "length": 33 + }, + { + "text": "Meanwhile relations between the U.", + "length": 34 + }, + { + "text": "concrete leads from the documents.", + "length": 34 + }, + { + "text": "and Pakistan continue to go down hill.", + "length": 38 + }, + { + "text": "guys knew we had it and adapted,' a senior U.", + "length": 45 + }, + { + "text": "agencies have now raked over the entire bounty .", + "length": 48 + }, + { + "text": "President’s jet Air Force One during a trip abroad.", + "length": 53 + }, + { + "text": "Plot: Osama bin Laden had been planning to attack the U.", + "length": 56 + }, + { + "text": "The former leader of Al Qaeda wanted to shoot down the U.", + "length": 57 + }, + { + "text": "He also wanted to murder General David Petraeus, the top U.", + "length": 59 + }, + { + "text": "Other numbers scattered about the compound also led nowhere.", + "length": 60 + }, + { + "text": "anniversary were only in the discussion phase, according to U.", + "length": 62 + }, + { + "text": "significant American dates - July 4 and September 11 for example.", + "length": 65 + }, + { + "text": "who should be included in the attack team, the Wall Street Journal .", + "length": 68 + }, + { + "text": "already begun putting a team of militants together for the attack, .", + "length": 68 + }, + { + "text": "been strong evidence Bin Laden was all for carrying out attacks on .", + "length": 68 + }, + { + "text": "according to communications seized by Navy Seals from his Pakistani .", + "length": 69 + }, + { + "text": "collected from the raid, but have had little success extracting any .", + "length": 69 + }, + { + "text": "he was struggling to continue to hold on to the type of influence and .", + "length": 71 + }, + { + "text": "'The treasure trove has not led to any big takedowns, because the bad .", + "length": 71 + }, + { + "text": "‘We have so many small airports, you could fly below radar,’ he said.", + "length": 73 + }, + { + "text": "officials who added they hadn't seen any signs the plot was ready to be .", + "length": 73 + }, + { + "text": "those first days afterwards, intelligence officials confirmed there had .", + "length": 73 + }, + { + "text": "'What we found was that he was very isolated, and it is clearly the case .", + "length": 74 + }, + { + "text": "having disagreements with his operations chief Attiyah Abd al-Rahman, on .", + "length": 74 + }, + { + "text": "to direct operations in ways he may have been able to do in the past,' a .", + "length": 74 + }, + { + "text": "public phone centre in the tribal areas of Pakistan; the other was a dead end.", + "length": 78 + }, + { + "text": "General Petraeus would also be killed in a similar fashion, the documents show.", + "length": 79 + }, + { + "text": "Interestingly Bin Laden may have been losing his grip on control of the terror group.", + "length": 85 + }, + { + "text": "The CIA has met repeatedly with Pakistan's intelligence agency to try and smooth things over.", + "length": 93 + }, + { + "text": "commander in Afghanistan, and was still obsessed with using planes to carry out terror attacks.", + "length": 95 + }, + { + "text": "Celebration: Thousands took to the streets in New York City to celebrate the death of Bin Laden .", + "length": 97 + }, + { + "text": "officials believe certain fractions of the Pakistani government knew about Bin Laden's whereabouts.", + "length": 99 + }, + { + "text": "But this is the first time they have spoken of an actual attempt planned for this year's anniversary.", + "length": 101 + }, + { + "text": "Attack: Osama Bin Laden was planning to bring down Air Force One with President Barack Obama on board .", + "length": 103 + }, + { + "text": "Other intelligence gleaned from the Pakistan lair suggested Bin Laden was losing his grip over Al Qaeda .", + "length": 105 + }, + { + "text": "Historic: President Obama announces the death of bin Laden from the East Room of the White House on May 2.", + "length": 106 + }, + { + "text": "The two phone numbers that Bin Laden had sewn into his clothing at the time of his death led nowhere, they added.", + "length": 113 + }, + { + "text": "Every year since the initial attack two blue lights have shone in the place of the towers in memory of the victims .", + "length": 116 + }, + { + "text": "Target: Bin Laden was hoping to take out the presidential jet while it transported Barack Obama on a foreign visit .", + "length": 116 + }, + { + "text": "Ideally one of his followers would fly a jet into a major sporting event in America, leading to massive loss of life.", + "length": 117 + }, + { + "text": "Little yield: Other material gathered during the raid at the hideout in Abottabad has not led to many concrete leads .", + "length": 118 + }, + { + "text": "The Obama administration has said it has 'no regrets' about the raid despite relations with Pakistan at an all-time low .", + "length": 121 + }, + { + "text": "The Pakistani government are livid about the raid, claiming not to have known anything about it before it was carried out.", + "length": 122 + }, + { + "text": "The plot was one of a smattering of threats to come out of the materials seized during the raid in which Bin Laden was killed.", + "length": 126 + }, + { + "text": "Osama bin Laden was planning a terrorist outrage to mark the 10th anniversary of 9/11 with Barack Obama number one on his hit list.", + "length": 131 + }, + { + "text": "Ill feeling: The May 2 raid has caused a lot of anti-American feeling in Pakistan and has left relations between the two nations frosty .", + "length": 137 + }, + { + "text": "Former FBI agent Brad Garrett said that despite improvements in security the plan to hijack a small jet and fly it into a sports stadium could work.", + "length": 148 + }, + { + "text": "The Obama administration, worried about leaks within the Pakistani government, didn't want  to tell Islamabad before the raid, it has been reported.", + "length": 149 + }, + { + "text": "According to other materials in the treasure trove of documents found at his Abottabad lair during the May 2 raid, he was often ignored by his henchmen.", + "length": 152 + }, + { + "text": "The disclosures came from the ‘treasure trove’ of intelligence seized at the mansion hideout in Pakistan where bin Laden was killed during a raid by U.", + "length": 155 + }, + { + "text": "Intelligence: Documents seized in the raid indicated Bin Laden was keen to carry out future attacks on significant American dates, like July 4, left, and September 11, right .", + "length": 175 + }, + { + "text": "The haul of computers and handwritten journals recovered from the compound in Abottabad have taught CIA operatives more in the past weeks than they knew in the previous 10 years.", + "length": 178 + }, + { + "text": "The Obama administration announce last Sunday it was suspending $800million in aid to Pakistan, as a punishment for what it sees as not aiding the ongoing counter-terrorism threat.", + "length": 180 + }, + { + "text": "Information including phone numbers that could have led to other terrorist leaders were rendered useless almost as soon as they fell into the Government's hands, officials revealed.", + "length": 181 + }, + { + "text": "The plan was to shoot down Air Force one or Marine One, the President’s helicopter, whilst it was in mid-flight by firing a missile or rocket propelled grenade from another aircraft.", + "length": 184 + }, + { + "text": "Officials say that the attack he was planning was in the preliminary stage and that bin Laden had been discussing who would carry it out with his operations chief Attiyah Abd al-Rahman.", + "length": 185 + }, + { + "text": "' Although at the embryonic stage, the details of the plot lead CIA acting director Michael Morell to tell staff that one of their top priorities was to ensure such an attack never took place.", + "length": 192 + }, + { + "text": "The most recent meeting came today when Pakistan intelligence chief, Lt Gen Ahmad Shuja Pasha met with CIA acting Director Michael Morell to discus how the two nations could move forward in the fight against terrorism.", + "length": 218 + }, + { + "text": "Situation room: Barack Obama and Vice President Joe Biden, along with members of the national security team, and Secretary of State Hillary Clinton receive an update on the mission against bin Laden before he is killed by U.", + "length": 224 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.81515172123909 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:47.489132942Z", + "first_section_created": "2025-12-23T09:34:47.489509157Z", + "last_section_published": "2025-12-23T09:34:47.489870171Z", + "all_results_received": "2025-12-23T09:34:47.578276946Z", + "output_generated": "2025-12-23T09:34:47.578470954Z", + "total_processing_time_ms": 89, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 88, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:47.489509157Z", + "publish_time": "2025-12-23T09:34:47.489766467Z", + "first_worker_start": "2025-12-23T09:34:47.490285088Z", + "last_worker_end": "2025-12-23T09:34:47.558232Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:47.49032569Z", + "start_time": "2025-12-23T09:34:47.490376692Z", + "end_time": "2025-12-23T09:34:47.490489096Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:47.490502Z", + "start_time": "2025-12-23T09:34:47.490661Z", + "end_time": "2025-12-23T09:34:47.558232Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:47.490289988Z", + "start_time": "2025-12-23T09:34:47.490439594Z", + "end_time": "2025-12-23T09:34:47.490553499Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:47.490221486Z", + "start_time": "2025-12-23T09:34:47.490285088Z", + "end_time": "2025-12-23T09:34:47.49033389Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:47.489801569Z", + "publish_time": "2025-12-23T09:34:47.489870171Z", + "first_worker_start": "2025-12-23T09:34:47.490360191Z", + "last_worker_end": "2025-12-23T09:34:47.577376Z", + "total_journey_time_ms": 87, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:47.49032859Z", + "start_time": "2025-12-23T09:34:47.490368991Z", + "end_time": "2025-12-23T09:34:47.490399493Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:47.490645Z", + "start_time": "2025-12-23T09:34:47.490786Z", + "end_time": "2025-12-23T09:34:47.577376Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 86 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:47.49032139Z", + "start_time": "2025-12-23T09:34:47.490360191Z", + "end_time": "2025-12-23T09:34:47.490399293Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:47.490319589Z", + "start_time": "2025-12-23T09:34:47.490363991Z", + "end_time": "2025-12-23T09:34:47.490382192Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 153, + "min_processing_ms": 67, + "max_processing_ms": 86, + "avg_processing_ms": 76, + "median_processing_ms": 86, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3275, + "slowest_section_id": 1, + "slowest_section_time_ms": 87 + } +} diff --git a/data/output/004c71c0874b0ec7b22efb92bae2a5ee1beff3d3.json b/data/output/004c71c0874b0ec7b22efb92bae2a5ee1beff3d3.json new file mode 100644 index 0000000..2920031 --- /dev/null +++ b/data/output/004c71c0874b0ec7b22efb92bae2a5ee1beff3d3.json @@ -0,0 +1,254 @@ +{ + "file_name": "004c71c0874b0ec7b22efb92bae2a5ee1beff3d3.txt", + "total_words": 536, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "of", + "count": 31 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "isis", + "count": 7 + }, + { + "word": "after", + "count": 6 + }, + { + "word": "against", + "count": 6 + }, + { + "word": "army", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Above, a fanatic waves the group's flag through the streets of Raqqa in Syria .", + "length": 79 + }, + { + "text": "ISIS has lost control of crucial swathes of territory including Kobane in Syria.", + "length": 80 + }, + { + "text": "The men are among hundreds who joined the military after clerics pleaded for worshippers to volunteer .", + "length": 104 + }, + { + "text": "A group of Shiite men crawl through sand in the Najaf desert, south of Baghdad, after joining the Iraqi Military .", + "length": 114 + }, + { + "text": "In Syria however, militiamen and women were filmed celebrating after defeating ISIS militants in the fight for Kobane.", + "length": 118 + }, + { + "text": "The town's Yazidi population was all but obliterated last year in a humanitarian crisis which prompted UN intervention.", + "length": 119 + }, + { + "text": "Over the weekend 26 strikes were carried out across Iraq and eight in Syria by US forces as part of the ongoing campaign.", + "length": 121 + }, + { + "text": "ISIS militants are thought to control around a third of Iraq despite a sustained campaign of airstrikes against fanatics .", + "length": 122 + }, + { + "text": "Fighters speaking on behalf of extremists admitted they had been overpowered by Kurdish forces following months of bloodshed.", + "length": 125 + }, + { + "text": "While IS fighters have been driven out of the region, Kurdish troops remain under the threat of sniper fire, experts have warned.", + "length": 129 + }, + { + "text": "The men are among hundreds to have signed up to the military after answering a plea from Iraq's most senior Shiite cleric last year.", + "length": 132 + }, + { + "text": "Last year Iraqi Army officials said an increase in the number of men signing up to the army would help the country in its fight against terror .", + "length": 144 + }, + { + "text": "Their faces and torsos smeared with dirt, a handful of Shiite Muslims crawl along desert sand under the watchful nose of their superior's rifles.", + "length": 145 + }, + { + "text": "The Shiite men join hundreds on the front line battling against fanatics, with Kurdish forces in the north also waging war against ISIS militants .", + "length": 147 + }, + { + "text": "In Sinjar, one of the most heated patches of conflict, Kurdish forces fought off extremists' unrelenting gunfire and strikes after months of battle.", + "length": 148 + }, + { + "text": "It is the latest in a string of blows to the fanatical regime which has also murdered Western and Japanese journalists and aid workers in the past year.", + "length": 152 + }, + { + "text": "Hundreds flocked to military centres across Baghdad, with Iraqi Army officials confident the boost in soldiers would help regain crucial swathes of territory.", + "length": 158 + }, + { + "text": "According to the former mayor of the neighbouring town of Tel Afar, civilians are 'ready' to join the Iraqi Army in its campaign to take back control of the city.", + "length": 162 + }, + { + "text": "In June, grand ayatollah Ali al-Sistani pleaded with youngsters to volunteer their services in the wake of crippling attacks against the religion by ISIS fighters.", + "length": 163 + }, + { + "text": "In the Najaf desert south of Baghdad, one group of volunteers were seen jumping through pits of fire and hurling themselves from sand dunes as part of the training.", + "length": 164 + }, + { + "text": "It is one of many gruelling drills they will endure after joining the Iraqi Army in its fight against bloodthirsty ISIS fanatics wreaking terror across the country.", + "length": 164 + }, + { + "text": "Since then extremists have faced increased opposition, fighting off Kurdish militias as well as the the army all the while under the threat of airstrike by US and UK led coalitions.", + "length": 181 + }, + { + "text": "Mosul, the country's second largest city and an ISIS stronghold, is primed for retaking, it was claimed earlier this week, with residents exasperated with life under the rule of fanatics.", + "length": 187 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8003047704696655 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:47.990616418Z", + "first_section_created": "2025-12-23T09:34:47.992415691Z", + "last_section_published": "2025-12-23T09:34:47.9926419Z", + "all_results_received": "2025-12-23T09:34:48.057462421Z", + "output_generated": "2025-12-23T09:34:48.057636228Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:47.992415691Z", + "publish_time": "2025-12-23T09:34:47.9926419Z", + "first_worker_start": "2025-12-23T09:34:47.99313152Z", + "last_worker_end": "2025-12-23T09:34:48.056571Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:47.993115519Z", + "start_time": "2025-12-23T09:34:47.993186222Z", + "end_time": "2025-12-23T09:34:47.993284426Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:47.993295Z", + "start_time": "2025-12-23T09:34:47.993448Z", + "end_time": "2025-12-23T09:34:48.056571Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:47.993076218Z", + "start_time": "2025-12-23T09:34:47.99313482Z", + "end_time": "2025-12-23T09:34:47.993202823Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:47.993068217Z", + "start_time": "2025-12-23T09:34:47.99313152Z", + "end_time": "2025-12-23T09:34:47.993179722Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3204, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/004c87c9f13979629299f0a9fa1aa0fc797b9e71.json b/data/output/004c87c9f13979629299f0a9fa1aa0fc797b9e71.json new file mode 100644 index 0000000..90d97ba --- /dev/null +++ b/data/output/004c87c9f13979629299f0a9fa1aa0fc797b9e71.json @@ -0,0 +1,266 @@ +{ + "file_name": "004c87c9f13979629299f0a9fa1aa0fc797b9e71.txt", + "total_words": 515, + "top_n_words": [ + { + "word": "she", + "count": 17 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "the", + "count": 16 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "that", + "count": 9 + }, + { + "word": "was", + "count": 9 + }, + { + "word": "sargent", + "count": 7 + }, + { + "word": "2012", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Amanda Williams .", + "length": 17 + }, + { + "text": "' Judge Glenn said: 'This was not a trivial amount.", + "length": 51 + }, + { + "text": "'That benefit would have been dishonest from the outset.", + "length": 56 + }, + { + "text": "And she claimed housing and council tax benefit from August 2009 to 2011.", + "length": 73 + }, + { + "text": "'Honest taxpayers are sick and tired of people like you taking their money.", + "length": 75 + }, + { + "text": "'You did not declare that you were living as husband and wife until March 2012.", + "length": 79 + }, + { + "text": "'Benefit cheats take away public money which would otherwise be spent on good causes.", + "length": 85 + }, + { + "text": "But Stoke-on-Trent Crown Court heard she was in fact living with a man whom she later married.", + "length": 94 + }, + { + "text": "'She was under an obligation to notify any change of circumstances if she co-habitated or married.", + "length": 98 + }, + { + "text": "During the hearing the judge told the mother that hardworking taxpayers were 'sick and tired of people like you.", + "length": 112 + }, + { + "text": "Hannah Sargent, 29, told the authorities she was a lone parent and claimed thousands between August 2009 and March 2012 .", + "length": 121 + }, + { + "text": "The mother-of-one claimed income support from July 2006 until July 2011 as well as housing, council and child tax benefits.", + "length": 123 + }, + { + "text": "Despite denying that she had been living with him until then, investigators found she had been overpaid a total of £27,165.", + "length": 124 + }, + { + "text": "' Hannah Sargent, 29, told the authorities she was a single parent and claimed thousands between August 2009 and March 2012.", + "length": 124 + }, + { + "text": "'She also claimed child tax credits between August 2009 and March 2012 and working tax credits from April 2011 to March 2012.", + "length": 125 + }, + { + "text": "But Stoke-on-Trent Crown Court heard that Sargent (pictured outside court) was in fact living with a man whom she later married .", + "length": 129 + }, + { + "text": "Judge Paul Glenn said the offending was serious enough to merit custody but that Sargent's  daughter would lose out if she was jailed.", + "length": 135 + }, + { + "text": "' Arif Hussain defending Sargent, who has no previous convictions, said she is voluntarily repaying the money at a rate of £105 a month.", + "length": 137 + }, + { + "text": "'We recognise it is only a minority exploiting the system but would encourage anyone with information relating to tax credit fraud to call.", + "length": 139 + }, + { + "text": "Instead she was sentenced to four months in prison, suspended for 18 months, ordered to undertake 180 hours unpaid work and pay £250 costs.", + "length": 140 + }, + { + "text": "Prosecutor Joanne Wallbanks said: 'In July 2011, the benefit changed to Jobseeker’s Allowance, until October 2011, when she became employed.", + "length": 142 + }, + { + "text": "A benefits cheat who stole £27,000 in handouts has been spared jail, despite a furious judge telling her: 'Taxpayers are sick of people like you.", + "length": 146 + }, + { + "text": "During the hearing Judge Paul Glenn told the mother that hardworking taxpayers were 'sick and tired of people like you' 'That would have been fraudulent from the start.", + "length": 168 + }, + { + "text": "' The court heard Sargent, from Chesterton, Stoke-on-Trent, Staffordshire, notified HM Revenue and Customs (HMRC) on March 9, 2012 that her partner had moved in with her.", + "length": 170 + }, + { + "text": "She admitted two offences of dishonestly failing to notify a change of circumstances, making a false representation to obtain benefits and two counts of fraudulently obtaining tax credits.", + "length": 188 + }, + { + "text": "After the case Rosemary Phillips, of HMRC, said: 'Sargent lied on numerous occasions to continue to receive benefits she wasn’t entitled to, pocketing money needed to fund public services.", + "length": 190 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8972035050392151 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:48.493416148Z", + "first_section_created": "2025-12-23T09:34:48.493788163Z", + "last_section_published": "2025-12-23T09:34:48.493986971Z", + "all_results_received": "2025-12-23T09:34:48.555322251Z", + "output_generated": "2025-12-23T09:34:48.555490358Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:48.493788163Z", + "publish_time": "2025-12-23T09:34:48.493986971Z", + "first_worker_start": "2025-12-23T09:34:48.494473391Z", + "last_worker_end": "2025-12-23T09:34:48.554442Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:48.494510792Z", + "start_time": "2025-12-23T09:34:48.494582395Z", + "end_time": "2025-12-23T09:34:48.494652498Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:48.494715Z", + "start_time": "2025-12-23T09:34:48.494861Z", + "end_time": "2025-12-23T09:34:48.554442Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:48.494473691Z", + "start_time": "2025-12-23T09:34:48.494536694Z", + "end_time": "2025-12-23T09:34:48.494615397Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:48.494410788Z", + "start_time": "2025-12-23T09:34:48.494473391Z", + "end_time": "2025-12-23T09:34:48.494503892Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3068, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/004cc1a4ecd2ce6327d67b801756847063e48c28.json b/data/output/004cc1a4ecd2ce6327d67b801756847063e48c28.json new file mode 100644 index 0000000..7b9af51 --- /dev/null +++ b/data/output/004cc1a4ecd2ce6327d67b801756847063e48c28.json @@ -0,0 +1,412 @@ +{ + "file_name": "004cc1a4ecd2ce6327d67b801756847063e48c28.txt", + "total_words": 989, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "to", + "count": 32 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "and", + "count": 23 + }, + { + "word": "postal", + "count": 23 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "that", + "count": 20 + }, + { + "word": "service", + "count": 18 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "as", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": ".", + "length": 1 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "R.", + "length": 2 + }, + { + "text": "mail.", + "length": 5 + }, + { + "text": "Rubio .", + "length": 7 + }, + { + "text": "10 an hour.", + "length": 11 + }, + { + "text": "Downsizing the U.", + "length": 17 + }, + { + "text": "Who were my co-workers?", + "length": 23 + }, + { + "text": "1351, introduced by Rep.", + "length": 24 + }, + { + "text": "A good start would be H.", + "length": 24 + }, + { + "text": "An alternative to this loss?", + "length": 28 + }, + { + "text": "Their jobs mattered to communities.", + "length": 35 + }, + { + "text": "But that network is underpinned by the U.", + "length": 41 + }, + { + "text": "For one thing, the postal service has been a huge employer.", + "length": 59 + }, + { + "text": "But they were also members of extended families and community networks.", + "length": 71 + }, + { + "text": "The opinions expressed in this commentary are solely those of Philip F.", + "length": 71 + }, + { + "text": "Above all, postal workers were proud of having a career serving the public.", + "length": 75 + }, + { + "text": "Postal jobs have especially played a key role in black community development.", + "length": 77 + }, + { + "text": "People able to consume goods that others produce to help drive local economies.", + "length": 79 + }, + { + "text": "The post office has long been one of the largest employers of African-Americans.", + "length": 80 + }, + { + "text": "centuries since the founding of this country (the post office was started in 1775).", + "length": 83 + }, + { + "text": "95 an hour and having to work a second job or collect food stamps to make ends meet.", + "length": 84 + }, + { + "text": "Today the nation relies on a vast mailing industry that operates primarily for profit.", + "length": 86 + }, + { + "text": "Before I became a history professor I carried mail for the Postal Service for 20 years.", + "length": 87 + }, + { + "text": "Many don't make the connection that e-commerce not only competes with but also generates U.", + "length": 91 + }, + { + "text": "5 billion a year out of operating funds to satisfy this unnecessary and devastating mandate.", + "length": 92 + }, + { + "text": "But postal workers are people we depend on and post offices are places we want to know will always be there.", + "length": 108 + }, + { + "text": "We lose more than just people committed to providing service, but also people engaged with their communities.", + "length": 109 + }, + { + "text": "As with many government jobs, you're hired for this one based on achieving a high score on a competitive exam.", + "length": 110 + }, + { + "text": "And that pre-fund requirement ultimately needs to be repealed to keep the Postal Service from running off the rails.", + "length": 116 + }, + { + "text": "We lose more than numbers when we lose postal jobs and post offices, or even the existence of a universal postal service.", + "length": 121 + }, + { + "text": "Just everyday people who, like me in 1980, were attracted to a job that had good benefits, job security, and started at $8.", + "length": 123 + }, + { + "text": "Bush, which forces the Postal Service to pre-fund its retiree health benefits 75 years into the future over the next 10 years.", + "length": 126 + }, + { + "text": "Or that the USPS came up with the concept of overnight mail and zip codes that UPS and FedEx rely on so heavily in their business.", + "length": 130 + }, + { + "text": "Even as they faced discrimination at other jobs, many found work there with college degrees or military service under their belts.", + "length": 130 + }, + { + "text": "Many started small businesses on the side, adopted foster children, were active in civic organizations, or enrolled in college classes.", + "length": 135 + }, + { + "text": "The job allowed many to move into the ranks of middle-class wage earners, where they were able to buy homes and send their children to college.", + "length": 143 + }, + { + "text": "This was as a result of the 1970 nationwide postal wildcat strike that began in New York after postal workers declared they were tired of earning $2.", + "length": 149 + }, + { + "text": "In collecting oral histories for a book I later wrote on the postal service, I interviewed those who had worked before 1970, including those who struck.", + "length": 152 + }, + { + "text": "Or that the post office is the victim of an artificial deficit created by the 2006 Postal Accountability and Enhancement Act, signed by President George W.", + "length": 155 + }, + { + "text": "Postal Service -- a self-supporting quasi-corporate government agency that remains committed to universal service by constitutional and congressional mandate.", + "length": 158 + }, + { + "text": "What should have been annual revenue surpluses for the Postal Service over the last decade have instead contributed to nightmare annual deficits as it is forced to pay $5.", + "length": 171 + }, + { + "text": "Stephen Lynch, D-Massachusetts, a bill that would at least allow the Postal Service to transfer surplus pension funds to satisfy the retiree health plan pre-fund requirement.", + "length": 174 + }, + { + "text": "Veterans, roughly 20% of today's postal workforce (though once well over 50%) earn extra points on this exam, thus giving them a head start and a job to come home to after military service.", + "length": 189 + }, + { + "text": "Many Americans may not realize that it was the Post Office that pioneered parcel post in 1916 in response to the overpriced, poor, and inconsistent service disaster that was private package delivery.", + "length": 199 + }, + { + "text": "(CNN) -- What would we lose if we lost 220,000 postal jobs (120,000 proposed through layoffs, 100,000 through attrition), 3,700 post offices, 300 mail processing plants, or even the post office itself?", + "length": 201 + }, + { + "text": "By 1970, they had become twice as likely as whites to work for the post office, and even before the wage bump that year, the job had afforded them a middle-class status and the ability to accumulate wealth.", + "length": 206 + }, + { + "text": "S Postal Service - -which is so low on money, it's in imminent danger of default -- may seem like a ripple in this troubled economy, but it promises to be a social tsunami if action isn't taken soon to save it.", + "length": 210 + }, + { + "text": "The postal worker's job could include processing mail as clerks and mail handlers, delivering it as letter carriers, driving it as truck drivers, and as maintenance workers keeping up the vehicles, buildings, and grounds.", + "length": 221 + }, + { + "text": "With millions of jobs and businesses lost to the recent recession, these may seem like just more numbers, or more seemingly inevitable \"facts\" -- that in the electronic age we now rely on the private sector to deliver public services.", + "length": 234 + }, + { + "text": "People could demand that Congress treat the Postal Service as a venerable American institution worthy of fulfilling its enduring mandate, for which it has recruited generations of skilled and dedicated professional government employees.", + "length": 236 + }, + { + "text": "Or that during the turn of this century -- the Postal Service's peak years of revenue and mail handling -- it was common to hear competitors and political ideologues calling for the agency's privatization, while at the same time blocking USPS innovations like the proposed 1997 Global Postal Link program to help expedite parcels through customs.", + "length": 346 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5147998780012131 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:48.994592313Z", + "first_section_created": "2025-12-23T09:34:48.994911025Z", + "last_section_published": "2025-12-23T09:34:48.995240139Z", + "all_results_received": "2025-12-23T09:34:49.108617523Z", + "output_generated": "2025-12-23T09:34:49.108764429Z", + "total_processing_time_ms": 114, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 113, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:48.994911025Z", + "publish_time": "2025-12-23T09:34:48.995164436Z", + "first_worker_start": "2025-12-23T09:34:48.995602653Z", + "last_worker_end": "2025-12-23T09:34:49.07749Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:48.995823662Z", + "start_time": "2025-12-23T09:34:48.995889265Z", + "end_time": "2025-12-23T09:34:48.99600797Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:48.996144Z", + "start_time": "2025-12-23T09:34:48.996303Z", + "end_time": "2025-12-23T09:34:49.07749Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 81 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:48.995825262Z", + "start_time": "2025-12-23T09:34:48.995892965Z", + "end_time": "2025-12-23T09:34:48.99601517Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:48.99553125Z", + "start_time": "2025-12-23T09:34:48.995602653Z", + "end_time": "2025-12-23T09:34:48.995641755Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:48.995195937Z", + "publish_time": "2025-12-23T09:34:48.995240139Z", + "first_worker_start": "2025-12-23T09:34:48.995869764Z", + "last_worker_end": "2025-12-23T09:34:49.104187Z", + "total_journey_time_ms": 108, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:48.995962568Z", + "start_time": "2025-12-23T09:34:48.995996369Z", + "end_time": "2025-12-23T09:34:48.99601567Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:48.996151Z", + "start_time": "2025-12-23T09:34:48.996283Z", + "end_time": "2025-12-23T09:34:49.104187Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 107 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:48.995880365Z", + "start_time": "2025-12-23T09:34:48.995908566Z", + "end_time": "2025-12-23T09:34:48.995931867Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:48.995832463Z", + "start_time": "2025-12-23T09:34:48.995869764Z", + "end_time": "2025-12-23T09:34:48.995878165Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 188, + "min_processing_ms": 81, + "max_processing_ms": 107, + "avg_processing_ms": 94, + "median_processing_ms": 107, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2885, + "slowest_section_id": 1, + "slowest_section_time_ms": 108 + } +} diff --git a/data/output/004d222ee2aeb81a6b842c3154ef2b305455974a.json b/data/output/004d222ee2aeb81a6b842c3154ef2b305455974a.json new file mode 100644 index 0000000..eeda12d --- /dev/null +++ b/data/output/004d222ee2aeb81a6b842c3154ef2b305455974a.json @@ -0,0 +1,596 @@ +{ + "file_name": "004d222ee2aeb81a6b842c3154ef2b305455974a.txt", + "total_words": 1713, + "top_n_words": [ + { + "word": "the", + "count": 88 + }, + { + "word": "her", + "count": 51 + }, + { + "word": "she", + "count": 49 + }, + { + "word": "and", + "count": 43 + }, + { + "word": "to", + "count": 40 + }, + { + "word": "a", + "count": 38 + }, + { + "word": "in", + "count": 28 + }, + { + "word": "upham", + "count": 26 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "police", + "count": 25 + } + ], + "sorted_sentences": [ + { + "text": "Wise.", + "length": 5 + }, + { + "text": "Funny.", + "length": 6 + }, + { + "text": "Spiritual.", + "length": 10 + }, + { + "text": "And hopeful.", + "length": 12 + }, + { + "text": "Too much darkness.", + "length": 18 + }, + { + "text": "And had known enemies.", + "length": 22 + }, + { + "text": "Not when its too late.", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "And about how she died.", + "length": 23 + }, + { + "text": "I feel so sad about Misty.", + "length": 26 + }, + { + "text": "I know she has more to say.", + "length": 27 + }, + { + "text": "'There's a lot of distrust.", + "length": 27 + }, + { + "text": "' Speaking to HollywoodLife.", + "length": 28 + }, + { + "text": "Police must do an investigation.", + "length": 32 + }, + { + "text": "Later, Lewis added: 'I cant sleep.", + "length": 34 + }, + { + "text": "I will remember her as tuff [sic].", + "length": 34 + }, + { + "text": "It is this presence that will continue.", + "length": 39 + }, + { + "text": "'She doesn't believe in killing herself.", + "length": 40 + }, + { + "text": "Bcuz she was vocal #ThisIsNOTaSUICIDE '.", + "length": 40 + }, + { + "text": "She had a presence everyone saw and felt.", + "length": 41 + }, + { + "text": "I pray the police do a murder investigation.", + "length": 44 + }, + { + "text": "She fought for the voiceless feircely [sic].", + "length": 44 + }, + { + "text": "'She understood suffering and the deepest pain.", + "length": 47 + }, + { + "text": "'It took about five-and-a-half hours to confirm.", + "length": 48 + }, + { + "text": "They're saying not 'foul play' when ofcourse it is.", + "length": 51 + }, + { + "text": "' 'We are now just waiting on the coroner's report.", + "length": 51 + }, + { + "text": "She attributed it to her Native-American background.", + "length": 52 + }, + { + "text": "'Out of the dark space in which her light got dimmed.", + "length": 53 + }, + { + "text": "Her shoes and purse were found at the top of the cliff.", + "length": 55 + }, + { + "text": "She believes that is a sin and she would never do that.", + "length": 55 + }, + { + "text": "They insist she had been talking about moving back to LA.", + "length": 57 + }, + { + "text": "'The family stated they do not feel she committed suicide.", + "length": 58 + }, + { + "text": "'It wasn't like she just decided to take time off,' he said.", + "length": 60 + }, + { + "text": "He denied his daughter would ever go through with a suicide.", + "length": 60 + }, + { + "text": "She recently played a house keeper in 'August: Osage County.", + "length": 60 + }, + { + "text": "Upham may have been under a lot of stress prior to her death.", + "length": 61 + }, + { + "text": "Her father previously said he did not believe she was suicidal.", + "length": 63 + }, + { + "text": "The family has stated that her death is most likely an accident.", + "length": 64 + }, + { + "text": "RIPMistyUpham' It took just over five hours to identify the body.", + "length": 65 + }, + { + "text": "'Misty spoke out alot against injustices within Native community.", + "length": 65 + }, + { + "text": "embankment where her body was found and accidentally fell off a cliff.", + "length": 70 + }, + { + "text": "'This is another great reminder to call and check on someone when you think to.", + "length": 79 + }, + { + "text": "It was her uncle, Roberth Upham, who organized the search party that found Misty.", + "length": 81 + }, + { + "text": "' He added the family was certain she fell or else was the subject of 'foul play'.", + "length": 82 + }, + { + "text": "' Her next onscreen role will be in 'Cake' with Jennifer Aniston and Anna Kendrick.", + "length": 83 + }, + { + "text": "Upham's family has also blasted claims of a suicide, claiming she fell to her death.", + "length": 84 + }, + { + "text": "Last night, it was Mr Upham who had to identify the lifeless body of his own daughter.", + "length": 86 + }, + { + "text": "Then we gathered around her body to say prayers and now we're all gathered at the church.", + "length": 89 + }, + { + "text": "He added: 'She has said suicidal things in the past but never followed through,' he said.", + "length": 89 + }, + { + "text": "'We waited at the site where the team found the body,' Rector told The Hollywood Reporter.", + "length": 90 + }, + { + "text": "'Her uncle found her, not police, and her purse was near the cliff where she fell to her death.", + "length": 95 + }, + { + "text": "She tweeted about her August: Osage County co-star: '#RIPMistyUpham I am in shock and greif [sic].", + "length": 98 + }, + { + "text": "Tragedy: The dead body of Misty Upham, 32, who was missing since October 5, was found on Thursday .", + "length": 99 + }, + { + "text": "Missing: Upham's parents had been canvassing their town and hanging 'missing' posters for 12 days .", + "length": 99 + }, + { + "text": "'I think if Misty was different-looking they would be doing a lot more to help find her,' she said.", + "length": 99 + }, + { + "text": "Officers have declared there is 'no evidence of foul play' as they continue to investigate the death.", + "length": 101 + }, + { + "text": "'Misty told me about horrible violence she suffered on NA reservations She felt she coul be murdered!", + "length": 101 + }, + { + "text": "' Asked if family and friends could trust the coroner's report, she responded: 'The family has concerns.", + "length": 104 + }, + { + "text": "Calling for action: She warned Misty feared 'she could be murdered' and had suffered horrible violence .", + "length": 104 + }, + { + "text": "Accolade: They are pictured sharing a moment here after receiving the Ensemble Cast Award for the movie .", + "length": 105 + }, + { + "text": "com, Mona Upham, Misty's mother, accused the Seattle area police of not doing enough to find her daughter.", + "length": 106 + }, + { + "text": "' Others close to the family of the Native American have said she could have been a victim of 'foul play'.", + "length": 106 + }, + { + "text": "Friends: The pair starred together in August: Osage County last year and remained firm friends afterwards .", + "length": 107 + }, + { + "text": "' He said the actress, a member of the Blackfoot tribe, had cut off her long locks before she went missing.", + "length": 107 + }, + { + "text": "And that's founded in the historical trauma experienced by the native community at the hands of the police.", + "length": 107 + }, + { + "text": "Police received a suicide call on October 5, but when officers arrived, she was no longer at her father's house.", + "length": 112 + }, + { + "text": "' Tribute: Juliette Lewis posted a picture on Instagram with Misty saying she fought for the voiceless fiercely .", + "length": 113 + }, + { + "text": "A day later, her father went to the police asking that his daughter be listed as a 'missing and endangered person.", + "length": 114 + }, + { + "text": "Movies: Upham recently played a house keeper in 'August: Osage County' which starred Meryl Streep and Julia Roberts.", + "length": 116 + }, + { + "text": "'She had her purse with her and because she had her purse and phone the police thought she left of her own will,' he said.", + "length": 122 + }, + { + "text": "Upham had recently returned to the Seattle-area to help care for her father, Charles Upham, who suffered a stroke last year.", + "length": 124 + }, + { + "text": "But today Juliette Lewis took to Twitter blasting the task force's announcement, warning: 'police must do an investigation'.", + "length": 124 + }, + { + "text": "People magazine reported that in the last year, police have responded to Upham's apartment four times following suicide calls.", + "length": 126 + }, + { + "text": "Auburn police had responded to a suicide call at Misty Upham's residence in the days before she was last seen, authorities said.", + "length": 128 + }, + { + "text": "He told KIRO-FM that Misty was in a 'psychotic' state when she left her house with only the clothes on her back and without her medication.", + "length": 139 + }, + { + "text": "Upham is best known for her role in the 2008 independent film 'Frozen River,' for which she received an Independent Spirit Award nomination.", + "length": 140 + }, + { + "text": "It is only her family searching right now, hanging posters, checking with shelters, hospitals, and asking people in the area if she has been seen.", + "length": 146 + }, + { + "text": "A spokesman for the Muckleshoot reservation did not immediately comment on Misty's death or allegations city police did not do enough to look for her.", + "length": 150 + }, + { + "text": "In a Facebook post on Upham's page, the family stated: 'We have been consumed by chasing down leads and doing ground searches in the area she was last seen.", + "length": 156 + }, + { + "text": "Therefore, the Upham family took to Facebook and local media to alert the public about their missing daughter and to enlist the public's help in finding her.", + "length": 157 + }, + { + "text": "Shortly after her disappearance, Upham told KIRO-FM that his daughter was upset and erratic and had stopped taking medication for anxiety and bipolar disorder.", + "length": 160 + }, + { + "text": "' She then retweeted a series of Upham's own tweets, including a warning for people to 'walk on egg shells' because she had 'just been followed by Indian Trader'.", + "length": 162 + }, + { + "text": "We are waiting to hear what is stated in the coroner's report but there is a long history of police harassment between the Auburn police and the native community.", + "length": 162 + }, + { + "text": "This was not the first time Upham has reportedly left her home without notice, and it is also not the first time police have responded to suicide calls at her residence.", + "length": 169 + }, + { + "text": "'The family is concerned that if the police had actually taken their concerns seriously within those first few hours of the report that perhaps she would have been found.", + "length": 170 + }, + { + "text": "A source, who would only call himself Harry, told Mail Online he believes the police in Auburn, Washington are 'derelict' in their duty after she went missing for 12 days.", + "length": 171 + }, + { + "text": "Lewis reiterated his sentiment by posting a picture of herself and Misty on Instagram with the caption: 'Misty Upham survived many things that many don't early on in life.", + "length": 171 + }, + { + "text": "Relatives, who have gathered together in Auburn, Washington, to grieve also believe the 32-year-old could have been found alive if police had 'taken their concerns seriously'.", + "length": 175 + }, + { + "text": "The family of Misty Upham have said they were forced to set up their own search party when the Django Unchained actress went missing after police 'did not help the investigation'.", + "length": 179 + }, + { + "text": "' The police did not comply, saying that Upham's case did not meet the criteria because her disappearance was not 'unexplainable, involuntary or suspicious,' according to USA Today.", + "length": 181 + }, + { + "text": "The Hollywood Reporter spoke with Seattle-based filmmaker Tracy Rector who said that Misty Upham's uncle, Robert Upham, organized a search party to look for the missing actress three days ago.", + "length": 192 + }, + { + "text": "A spokesman for the family released a statement on Friday evening saying: 'The family wants to make it clear the Auburn police did not help in the investigation or the finding of Misty at all.", + "length": 193 + }, + { + "text": "But she had won an award for her role in the 2008 independent film 'Frozen River' A source reportedly told People that Upham was struggling financially and sometimes had trouble paying her rent.", + "length": 194 + }, + { + "text": "' Mr Upham told the MailOnline late Wednesday that he was frantic to find his daughter and felt that media attention had focused on her glamorous days on the red carpet and not her as a 'normal person.", + "length": 201 + }, + { + "text": "'And you know, Misty has also experienced harassment at the hands of police so you know, the family is concerned about the circumstances surrounding what happened and why police chose not look for Misty.", + "length": 203 + }, + { + "text": "'And they've also stated that they feel ais if their concerns weren't listed to, and when they pleaded for help this perhaps could have been prevented and her body could have been found in time after she fell.", + "length": 209 + }, + { + "text": "It follows claims from her August: Osage County co-star Juliette Lewis that the 32-year-old Native American told her she 'feared she was being targeted' while others believe her death was as a result of 'foul play'.", + "length": 215 + }, + { + "text": "Upham, 32, a Native American actress praised for her roles in movies including August: Osage County, Frozen River and Django Unchained, was missing for 12 days before police found her body at the bottom of a wooded embankment in Auburn, Washington.", + "length": 248 + }, + { + "text": "Frustration: Her family (Charles her father pictured left) claim they had to set up their own search party because the police 'did not help the investigation' Despair: Her father posted on Facebook saying that law enforcement was not looking for his daughter .", + "length": 260 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6547710001468658 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:49.495997586Z", + "first_section_created": "2025-12-23T09:34:49.496355001Z", + "last_section_published": "2025-12-23T09:34:49.496666213Z", + "all_results_received": "2025-12-23T09:34:49.589719176Z", + "output_generated": "2025-12-23T09:34:49.589942885Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 93, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:49.496355001Z", + "publish_time": "2025-12-23T09:34:49.496564809Z", + "first_worker_start": "2025-12-23T09:34:49.497165333Z", + "last_worker_end": "2025-12-23T09:34:49.570143Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:49.497213035Z", + "start_time": "2025-12-23T09:34:49.497282538Z", + "end_time": "2025-12-23T09:34:49.497387242Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:49.497534Z", + "start_time": "2025-12-23T09:34:49.497682Z", + "end_time": "2025-12-23T09:34:49.570143Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:49.497466245Z", + "start_time": "2025-12-23T09:34:49.497938265Z", + "end_time": "2025-12-23T09:34:49.498014368Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:49.49709443Z", + "start_time": "2025-12-23T09:34:49.497165333Z", + "end_time": "2025-12-23T09:34:49.497214635Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:49.49659391Z", + "publish_time": "2025-12-23T09:34:49.496666213Z", + "first_worker_start": "2025-12-23T09:34:49.497302139Z", + "last_worker_end": "2025-12-23T09:34:49.588805Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:49.497242736Z", + "start_time": "2025-12-23T09:34:49.497302139Z", + "end_time": "2025-12-23T09:34:49.497379042Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:49.49766Z", + "start_time": "2025-12-23T09:34:49.49779Z", + "end_time": "2025-12-23T09:34:49.588805Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:49.497242936Z", + "start_time": "2025-12-23T09:34:49.49732574Z", + "end_time": "2025-12-23T09:34:49.497444145Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:49.497254137Z", + "start_time": "2025-12-23T09:34:49.49732554Z", + "end_time": "2025-12-23T09:34:49.497369342Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 163, + "min_processing_ms": 72, + "max_processing_ms": 91, + "avg_processing_ms": 81, + "median_processing_ms": 91, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4888, + "slowest_section_id": 1, + "slowest_section_time_ms": 92 + } +} diff --git a/data/output/004d2253b243331ec330b4bc188f9289fc5730f6.json b/data/output/004d2253b243331ec330b4bc188f9289fc5730f6.json new file mode 100644 index 0000000..6147ad5 --- /dev/null +++ b/data/output/004d2253b243331ec330b4bc188f9289fc5730f6.json @@ -0,0 +1,436 @@ +{ + "file_name": "004d2253b243331ec330b4bc188f9289fc5730f6.txt", + "total_words": 1094, + "top_n_words": [ + { + "word": "the", + "count": 70 + }, + { + "word": "and", + "count": 37 + }, + { + "word": "to", + "count": 34 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "s", + "count": 26 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "that", + "count": 24 + }, + { + "word": "mubarak", + "count": 22 + }, + { + "word": "cable", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "] .", + "length": 3 + }, + { + "text": "\" The U.", + "length": 8 + }, + { + "text": "For the U.", + "length": 10 + }, + { + "text": "(CNN) -- The U.", + "length": 15 + }, + { + "text": "It has complained about cuts in U.", + "length": 34 + }, + { + "text": "\" But given Mubarak's sensitivities, the U.", + "length": 43 + }, + { + "text": "\" In a later cable, Scobey suggested the new U.", + "length": 47 + }, + { + "text": "and makes any kind of violent change of leader unlikely.", + "length": 56 + }, + { + "text": "succeed his father remains deeply unpopular on the street.", + "length": 58 + }, + { + "text": "4 million strong, is at least twice the size it was under Sadat ...", + "length": 67 + }, + { + "text": "has trodden carefully in pressing the Egyptian government on human rights.", + "length": 74 + }, + { + "text": "\" In a later cable, she said that Mubarak \"harkens back to the Shah of Iran: the U.", + "length": 83 + }, + { + "text": "The cables show that Mubarak has taken a persistently hard line toward Iran, telling U.", + "length": 87 + }, + { + "text": "Even so, one cable concludes: \"The military still remains a potent political and economic force.", + "length": 96 + }, + { + "text": "\" He viewed himself as \"someone who is tough but fair, who ensures the basic needs of his people.", + "length": 97 + }, + { + "text": "[Egypt signed a peace treaty with Israel in 1979, and Mubarak has resisted popular opposition to it.", + "length": 100 + }, + { + "text": "And above all, Egypt is regarded as a moderate bulwark against Iranian-sponsored Islamist fundamentalism.", + "length": 105 + }, + { + "text": "A CNN analysis of secret and confidential cables published by WikiLeaks and its media partners reveals U.", + "length": 105 + }, + { + "text": "\" At the same time, the Mubarak government has been very sensitive to any perceived slight from Washington.", + "length": 107 + }, + { + "text": "\" There is no guarantee that any \"successor\" to the Mubarak government would take such a hard line with Hamas.", + "length": 110 + }, + { + "text": "\" That perspective is now being challenged -- and the role of the military may be critical in deciding the outcome.", + "length": 115 + }, + { + "text": "diplomatic cables, but is also underpinned by similar basic interests in a rough and unpredictable part of the world.", + "length": 117 + }, + { + "text": "encouraged him to accept reforms, only to watch the country fall into the hands of revolutionary religious extremists.", + "length": 118 + }, + { + "text": "\" To that end, the cables describe the Mubarak government as a helpful partner in stopping smuggling into Gaza from Egypt.", + "length": 122 + }, + { + "text": "relationship with President Hosni Mubarak's Egypt is full of contradictions and tensions, according to recently published U.", + "length": 124 + }, + { + "text": "\" It adds that \"unlike his father, (Gamal) cannot take the military's support for granted,\" having never served as an officer.", + "length": 126 + }, + { + "text": "But the same cable laments the lack of obvious contenders to succeed the aging Mubarak -- a situation that appears to hold today.", + "length": 129 + }, + { + "text": "Secretary of State, Hillary Clinton \"may wish to lay down a marker for a future discussion on democratization and human rights concerns.", + "length": 136 + }, + { + "text": "\" Scobey continued that Mubarak was \"a tried and true realist, innately cautious and conservative, and has little time for idealistic goals.", + "length": 140 + }, + { + "text": "\" Recent events may have eroded that confidence, but one cable in 2007 pointed out that Egypt's internal security apparatus, \"an estimated 1.", + "length": 141 + }, + { + "text": "economic aid and a stagnant level of military aid \"because it shows our diminished view of the value of our relationship\" according to one cable.", + "length": 145 + }, + { + "text": "A cable from 2009 said the United States now avoided \"the public confrontations that had become routine over the past several years\" over human rights.", + "length": 151 + }, + { + "text": "diplomats in 2008 that he had warned Tehran \"not to provoke the Americans\" on the nuclear issue and insisting Egypt could never accept a nuclear-armed Iran.", + "length": 156 + }, + { + "text": "\" The Egyptian president relied on his interior minister and intelligence service to \"keep the domestic beasts at bay, and Mubarak is not one to lose sleep over their tactics.", + "length": 175 + }, + { + "text": "\" A 2009 cable noted that with \"the discovery of a Hezbollah cell in Egypt, the Egyptians appear more willing to confront the Iranian surrogates and to work closely with Israel.", + "length": 177 + }, + { + "text": ", the alliance between Egypt and Saudi Arabia has also been an important counterweight to growing Iranian influence on the \"Arab street\" and among states such as Syria and Qatar.", + "length": 178 + }, + { + "text": "frustration with Mubarak's lack of succession planning, concerns over stuttering economic reform and private criticism of the Mubarak government's hard line toward domestic opponents.", + "length": 183 + }, + { + "text": "Scobey wrote in apparent frustration two years ago that Mubarak \"seems to be trusting to God and the ubiquitous military and civilian security services to ensure an orderly transition.", + "length": 184 + }, + { + "text": "On pressure to improve human rights, according to one cable from Scobey in 2009, \"Mubarak takes this issue personally, and it makes him seethe when we raise it, particularly in public.", + "length": 184 + }, + { + "text": "Over the past five years, the cables reveal a growing unease with the lack of a succession plan, and apprehension about the prospect of Mubarak's younger son, Gamal, taking over from his father.", + "length": 194 + }, + { + "text": "Ahead of Mubarak's visit to Washington in May 2009, Ambassador Margaret Scobey wrote from Cairo that \"the Egyptians want the visit to demonstrate that Egypt remains America's indispensable \"Arab ally.", + "length": 200 + }, + { + "text": "A cable from 2008 cites Egyptian experts as describing a \"disgruntled mid-level officer corps\" with military salaries falling far behind the civilian sector and the top brass averse to Gamal succeeding his father.", + "length": 213 + }, + { + "text": "Egyptian commentators also noted that many officers were frustrated that loyalty to the regime trumped competence, and that the best military talent was sidelined in case it should pose a threat to the government.", + "length": 213 + }, + { + "text": "Egyptian officials, from Mubarak down, have also repeatedly impressed upon visiting Americans -- military, diplomatic and Congressional -- that it alone among Arab states can play a mediating role between Israel and the Palestinians.", + "length": 233 + }, + { + "text": "\" After discussing whether the military might step in to prevent Mubarak from passing the baton to his son, the cable concludes: \"In a messier succession scenario, however, it becomes more difficult to predict the military's actions.", + "length": 233 + }, + { + "text": "cables display frustration with Mubarak's reluctance to address human rights issues, with one in 2008 saying: \"While Egypt has made some limited gains over the last several years, such as on freedom of the press, progress overall has been slow.", + "length": 244 + }, + { + "text": "Mubarak has also repeatedly warned of Iran's influence with Hamas in Gaza and Hezbollah in Lebanon, and in a cable from February last year, was quoted as describing \"Tehran's hand moving with ease throughout the region, from the Gulf to Morocco.", + "length": 245 + }, + { + "text": "But the cables also show that Washington sees Egypt as an important and -- until now -- stable ally on issues, including Iran's nuclear program, promoting negotiations between Israel and the Palestinian Authority and making life difficult for Hamas in Gaza.", + "length": 257 + }, + { + "text": "A cable from 2008 quoted a senior Egyptian military figure as stating that Egypt had spent approximately $40 million to purchase the steel for an underground wall on the Gaza border, \"and Egypt was paying the cost of this wall in terms of public opinion both within Egypt and the region.", + "length": 287 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7928171455860138 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:49.997763974Z", + "first_section_created": "2025-12-23T09:34:49.998068187Z", + "last_section_published": "2025-12-23T09:34:49.998449102Z", + "all_results_received": "2025-12-23T09:34:50.08372915Z", + "output_generated": "2025-12-23T09:34:50.083951459Z", + "total_processing_time_ms": 86, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 85, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:49.998068187Z", + "publish_time": "2025-12-23T09:34:49.998292996Z", + "first_worker_start": "2025-12-23T09:34:49.998671111Z", + "last_worker_end": "2025-12-23T09:34:50.082705Z", + "total_journey_time_ms": 84, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:49.998745314Z", + "start_time": "2025-12-23T09:34:49.998824717Z", + "end_time": "2025-12-23T09:34:49.998949722Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:49.998891Z", + "start_time": "2025-12-23T09:34:49.999028Z", + "end_time": "2025-12-23T09:34:50.082705Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 83 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:49.998661811Z", + "start_time": "2025-12-23T09:34:49.998741214Z", + "end_time": "2025-12-23T09:34:49.998852218Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:49.998599908Z", + "start_time": "2025-12-23T09:34:49.998671111Z", + "end_time": "2025-12-23T09:34:49.998710013Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:49.998338097Z", + "publish_time": "2025-12-23T09:34:49.998449102Z", + "first_worker_start": "2025-12-23T09:34:49.998840518Z", + "last_worker_end": "2025-12-23T09:34:50.075117Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:49.998928921Z", + "start_time": "2025-12-23T09:34:49.999066027Z", + "end_time": "2025-12-23T09:34:49.999103128Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:49.999129Z", + "start_time": "2025-12-23T09:34:49.999301Z", + "end_time": "2025-12-23T09:34:50.075117Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:49.998915421Z", + "start_time": "2025-12-23T09:34:49.998966723Z", + "end_time": "2025-12-23T09:34:49.999006225Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:49.998805116Z", + "start_time": "2025-12-23T09:34:49.998840518Z", + "end_time": "2025-12-23T09:34:49.998856418Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 158, + "min_processing_ms": 75, + "max_processing_ms": 83, + "avg_processing_ms": 79, + "median_processing_ms": 83, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3317, + "slowest_section_id": 0, + "slowest_section_time_ms": 84 + } +} diff --git a/data/output/004d5b0e443ef65ff4bea3e86dbba8beee50b68f.json b/data/output/004d5b0e443ef65ff4bea3e86dbba8beee50b68f.json new file mode 100644 index 0000000..650775b --- /dev/null +++ b/data/output/004d5b0e443ef65ff4bea3e86dbba8beee50b68f.json @@ -0,0 +1,422 @@ +{ + "file_name": "004d5b0e443ef65ff4bea3e86dbba8beee50b68f.txt", + "total_words": 868, + "top_n_words": [ + { + "word": "the", + "count": 44 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "her", + "count": 25 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "amish", + "count": 16 + }, + { + "word": "she", + "count": 15 + }, + { + "word": "i", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'Now .", + "length": 6 + }, + { + "text": "Blogs .", + "length": 7 + }, + { + "text": "Online .", + "length": 8 + }, + { + "text": "com to .", + "length": 8 + }, + { + "text": "'After .", + "length": 8 + }, + { + "text": "' With .", + "length": 8 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "-based agency.", + "length": 14 + }, + { + "text": "Sadie Whitelocks .", + "length": 18 + }, + { + "text": "' Scroll down for video .", + "length": 25 + }, + { + "text": "12:46 EST, 19 March 2014 .", + "length": 26 + }, + { + "text": "09:20 EST, 18 March 2014 .", + "length": 26 + }, + { + "text": "up her Amish life to move to the city.", + "length": 38 + }, + { + "text": "There were no electronics in the house.", + "length": 39 + }, + { + "text": "burn it and tell me that I'm going to hell.", + "length": 43 + }, + { + "text": "they represent 'vanity' which they deem a sin.", + "length": 46 + }, + { + "text": "but then again he also knows that he can't control my life.", + "length": 59 + }, + { + "text": "records show that Kate posted a photograph on exploretalent.", + "length": 60 + }, + { + "text": "be the first person in my family to ever have a college education.", + "length": 66 + }, + { + "text": "spend about six hours at a time just out in the fields raking hay.", + "length": 66 + }, + { + "text": "'I couldn't ask for better parents or better brothers and sisters.", + "length": 66 + }, + { + "text": "thing that matters is who you are and whether or not you're happy.", + "length": 66 + }, + { + "text": "She said: 'I actually have really big hopes and plans for the future.", + "length": 69 + }, + { + "text": "school I'd take care of the calves, I make dinner for the family and .", + "length": 70 + }, + { + "text": "father face to face a lingerie picture that I took he would probably .", + "length": 70 + }, + { + "text": "Now the 5ft 8in beauty sets pulses racing with saucy underwear shoots.", + "length": 70 + }, + { + "text": "that I'm outside of that I realise the world is so large and the only .", + "length": 71 + }, + { + "text": "mean my dad's a minister in the church, he of course does not approve .", + "length": 71 + }, + { + "text": "which is at odds with the Amish stance against photographs, believing .", + "length": 71 + }, + { + "text": "want to be on a billboard in the middle of Times Square and I want to .", + "length": 71 + }, + { + "text": "enter a Best Smile contest in 2010 - years before she claimed she gave .", + "length": 72 + }, + { + "text": "After appearing on the small screen, Miss Stoltz's life changed overnight.", + "length": 74 + }, + { + "text": "also revealed she had previously submitted images to modeling agencies - .", + "length": 74 + }, + { + "text": "no desire to return to the Amish way of life Miss Stoltz is making plans for .", + "length": 78 + }, + { + "text": "Despite the radical change in her lifestyle, Miss Stoltz is still very close to her family.", + "length": 91 + }, + { + "text": "her future and will appear in a second TLC spinoff series called Return to Amish this June.", + "length": 91 + }, + { + "text": "' And the bishop's daughter had to deal with the disapproval of the community she left behind.", + "length": 94 + }, + { + "text": "' Lingerie modeling wasn't the first time Miss Stoltz had broken the rules of her strict faith.", + "length": 95 + }, + { + "text": "During Breaking Amish, she was forced to admit she had been arrested for a DUI before the show.", + "length": 95 + }, + { + "text": "Leaving the past behind: Miss Stoltz says she has no desire to return to the Amish way of life .", + "length": 96 + }, + { + "text": "She said: 'Even though they don't approve of my lifestyle they're always telling me to come back home.", + "length": 102 + }, + { + "text": "Looking back on her old life, the pretty brunette said: 'Before school I would go out and feed the cows.", + "length": 104 + }, + { + "text": "Career ambitions: The 5ft 8in brunette says her dream is to appear on a giant billboard in Times Square .", + "length": 105 + }, + { + "text": "Racy lady: The New York-based 23-year-old landed her first major modeling job with men's magazine Maxim .", + "length": 105 + }, + { + "text": "Like the other women in her family, she would wear conservative ankle-length dresses, a smock and bonnet.", + "length": 105 + }, + { + "text": "Mugshot: During Breaking Amish, she was forced to admit she had been arrested for a DUI before the show .", + "length": 105 + }, + { + "text": "Changed: Miss Stoltz, a bishop's daughter, is pictured in 2012 as she appeared on TLC's show, Breaking Amish .", + "length": 110 + }, + { + "text": "Her good looks caught the attention of Major Model Management and she was immediately signed up by the European and U.", + "length": 118 + }, + { + "text": "The series followed a group of Amish and Mennonite youngsters as they had their first taste of modern life in New York.", + "length": 119 + }, + { + "text": "On top of school work she spent hours in the fields pitching hay as well as cooking, cleaning and looking after her family.", + "length": 123 + }, + { + "text": "'The Amish think that modelling is one of the worst things a woman can do, they see it as flaunting your body and being vain.", + "length": 125 + }, + { + "text": "' But in 2012 the Miss Stoltz turned her back on her ultra-strict upbringing to appear in TLC's reality TV show Breaking Amish.", + "length": 127 + }, + { + "text": "Making headlines: Miss Stotlz also appeared in the New York Post, when the newspaper covered the 100th anniversary of the brassiere .", + "length": 133 + }, + { + "text": "The New York-based 23-year-old, who landed her first major job with men's magazine Maxim last year, said: 'If I were to even show my .", + "length": 134 + }, + { + "text": "Breaking Amish star Kate Stotlz has opened up about how her strict religious family vehemently disapprove of her lingerie modeling career.", + "length": 138 + }, + { + "text": "Growing up Miss Stoltz, born Kate Stoltzfus, was even forbidden from having her picture taken fully-dressed because of traditional Amish beliefs.", + "length": 145 + }, + { + "text": "Stripping off her inhibitions: Growing up Miss Stoltz was even forbidden from having her picture taken fully-dressed because of traditional Amish beliefs .", + "length": 155 + }, + { + "text": "Starting a new chapter of her life: In 2012 the Miss Stoltz turned her back on her ultra-strict upbringing to appear in the TLC reality TV show Breaking Amish .", + "length": 160 + }, + { + "text": "Not for mom and dad to see: Breaking Amish star Kate Stotlz has opened up about how her strict religious family vehemently disapprove of her lingerie modeling career .", + "length": 167 + }, + { + "text": "Talking about her move from rural Pennsylvania to Manhattan, Miss Stoltz recalled: 'I actually found it really hard to get used to New York because it's so loud and there's so many people.", + "length": 188 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.46513110399246216 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:50.499176748Z", + "first_section_created": "2025-12-23T09:34:50.50096072Z", + "last_section_published": "2025-12-23T09:34:50.501165729Z", + "all_results_received": "2025-12-23T09:34:50.572757923Z", + "output_generated": "2025-12-23T09:34:50.572952231Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:50.50096072Z", + "publish_time": "2025-12-23T09:34:50.501165729Z", + "first_worker_start": "2025-12-23T09:34:50.50169425Z", + "last_worker_end": "2025-12-23T09:34:50.571944Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:50.501662949Z", + "start_time": "2025-12-23T09:34:50.501739752Z", + "end_time": "2025-12-23T09:34:50.501835156Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:50.5019Z", + "start_time": "2025-12-23T09:34:50.502043Z", + "end_time": "2025-12-23T09:34:50.571944Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:50.501645248Z", + "start_time": "2025-12-23T09:34:50.50169685Z", + "end_time": "2025-12-23T09:34:50.501860657Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:50.501622747Z", + "start_time": "2025-12-23T09:34:50.50169425Z", + "end_time": "2025-12-23T09:34:50.501731651Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 69, + "min_processing_ms": 69, + "max_processing_ms": 69, + "avg_processing_ms": 69, + "median_processing_ms": 69, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4791, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/004d6b65c6de14b27a46cf21d7b727cdfd25f638.json b/data/output/004d6b65c6de14b27a46cf21d7b727cdfd25f638.json new file mode 100644 index 0000000..4c0c048 --- /dev/null +++ b/data/output/004d6b65c6de14b27a46cf21d7b727cdfd25f638.json @@ -0,0 +1,560 @@ +{ + "file_name": "004d6b65c6de14b27a46cf21d7b727cdfd25f638.txt", + "total_words": 1323, + "top_n_words": [ + { + "word": "the", + "count": 86 + }, + { + "word": "of", + "count": 40 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "is", + "count": 21 + }, + { + "word": "water", + "count": 20 + }, + { + "word": "s", + "count": 18 + }, + { + "word": "kali", + "count": 17 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "the heat.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "They obliged.", + "length": 13 + }, + { + "text": "highest when the .", + "length": 18 + }, + { + "text": "The CDC says there .", + "length": 20 + }, + { + "text": "summer heat each year.", + "length": 22 + }, + { + "text": "S in the past 50 years.", + "length": 23 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "in the last five decades .", + "length": 26 + }, + { + "text": "09:20 EST, 14 August 2013 .", + "length": 27 + }, + { + "text": "13:58 EST, 14 August 2013 .", + "length": 27 + }, + { + "text": "of Kali when she gets home.", + "length": 27 + }, + { + "text": "'You're a mom, you're a dad.", + "length": 28 + }, + { + "text": "We don't want to scare people.", + "length": 30 + }, + { + "text": "Ms Hardig, who is battling breast .", + "length": 35 + }, + { + "text": "in the past fifty years, a month ago.", + "length": 37 + }, + { + "text": "Michael Zennie and Associated Press .", + "length": 37 + }, + { + "text": "You can still take your child swimming.", + "length": 39 + }, + { + "text": "'It's kind of a battle to decide that .", + "length": 39 + }, + { + "text": "One other survivor was found in Mexico .", + "length": 40 + }, + { + "text": "So they can go and still be a kid,' she said.", + "length": 45 + }, + { + "text": "But, they only become active when water heats up.", + "length": 49 + }, + { + "text": "Mark Heulitt, one of the doctors who treated her.", + "length": 49 + }, + { + "text": "Only one person has survived in the United States.", + "length": 50 + }, + { + "text": "have been around 130 cases of the disease in the U.", + "length": 51 + }, + { + "text": "The amoeba needs very specific conditions to thrive.", + "length": 52 + }, + { + "text": "They are obviously ecstatic at the change of outlook.", + "length": 53 + }, + { + "text": "coming home and I'm going to get to take care of her,' she said.", + "length": 64 + }, + { + "text": "'She wound up being on a ventilator for over two weeks,' said Dr.", + "length": 65 + }, + { + "text": "Primary amoebic meningoencephalitis is as rare as it is horrific.", + "length": 65 + }, + { + "text": "'We're praying for him to be survivor number four,' Ms Hardig said.", + "length": 67 + }, + { + "text": "Ms Hardig is also hoping to raise awareness of another case of PAM.", + "length": 67 + }, + { + "text": "It's 99 per cent fatal with only two reported instances of survival.", + "length": 68 + }, + { + "text": "- making the disease extremely rare, when compared to the number of .", + "length": 69 + }, + { + "text": "Health officials say swimmers can reduce their risks even further by: .", + "length": 71 + }, + { + "text": "lakes and muddy swimming holes Americans dip into for refuge from the .", + "length": 71 + }, + { + "text": "temperature is hottest - and swimmers are looking for a reprieve from .", + "length": 71 + }, + { + "text": "cancer, is now making sure to focus on her health so she can take care .", + "length": 72 + }, + { + "text": "Kali, but I know I have to try to get myself better too, because Kali's .", + "length": 73 + }, + { + "text": "Naegleria are found in nearly every freshwater body of water - even lakes.", + "length": 74 + }, + { + "text": "Three days later, Kali was moved out of the pediatric intensive care unit.", + "length": 74 + }, + { + "text": "you want to put your health on hold because you want to totally focus on .", + "length": 74 + }, + { + "text": "We just want you to know there are little things you can do to help them out.", + "length": 77 + }, + { + "text": "Zachary Reyna is being treated for the infection at Miami Children's Hospital.", + "length": 78 + }, + { + "text": "Eventually the infection destroys brain tissue which causes fatal brain swelling.", + "length": 81 + }, + { + "text": "has recorded just 130 cases of Naegleria fowleri infections in the last 50 years .", + "length": 82 + }, + { + "text": "But the treatment seems to have worked, and she was taken off the ventilator on August 6.", + "length": 89 + }, + { + "text": "The amoebas are most active when the water temperature reaches about 85 degrees, Barham said.", + "length": 93 + }, + { + "text": "That's remarkable progress for a girl who was barely able to squeeze her mother's hand last week.", + "length": 97 + }, + { + "text": "However, this is the second time the parasite has infected a swimmer at Willow Springs Water Park.", + "length": 98 + }, + { + "text": "Health officials have recorded only 125 instances of the disease in the United States in the last 50 years.", + "length": 107 + }, + { + "text": "The water must reach a relatively high temperature for the parasites to reach numbers that threaten humans.", + "length": 107 + }, + { + "text": "On Monday her family posted a picture on a Facebook support page of Kali writing her name for the first time.", + "length": 109 + }, + { + "text": "Stronger: Kali was taken off a ventilator and transferred out of the pediatric intensive care unit last week .", + "length": 110 + }, + { + "text": "It grows in the sediment at the bottom of pools of warm, stagnant water and is most active at about 85 degrees.", + "length": 111 + }, + { + "text": "Kali's mother said she had no idea about the risks of swimming in warm lakes and had never heard of the disease.", + "length": 112 + }, + { + "text": "In August 2010, 7-year-old Davian Briggs died after contracting the disease after he had been swimming at the lake.", + "length": 115 + }, + { + "text": "Making strides: Doctors believe early detection and experimental treatments are helping Kali get over her infection .", + "length": 117 + }, + { + "text": "To combat the the infection, doctors cooled Kali's body to reduce the swelling and even put her on breast-cancer drugs.", + "length": 119 + }, + { + "text": "It is caused by an amoeba that implants itself in its victim's brains - usually when infected water shoots of their noses.", + "length": 122 + }, + { + "text": "He contracted the amoeba while knee boarding in a ditch near his house with friends in LaBelle, Florida earlier this month.", + "length": 123 + }, + { + "text": "But doctors now believe Kali will make it out of the woods, due in large part to early detection and experimental treatment.", + "length": 124 + }, + { + "text": "Kali Hardig contracted parasitic meningitis, a very rare form of meningitis that only 130 people have been diagnosed with in the U.", + "length": 131 + }, + { + "text": "The rare form of meningitis is caused by the Naegleria fowleri amoeba entering the body through the nose and traveling into the brain.", + "length": 134 + }, + { + "text": "Good signs: Kali wrote her name for the first time Monday, left, after only barely being able to squeeze her mother's hand a week ago .", + "length": 135 + }, + { + "text": "Because two cases have been seen from the same body was water, state health officials asked the park owners to close their swimming hole.", + "length": 137 + }, + { + "text": "Still, Barham, of the health department, says that swimmers really shouldn't be too concerned about the danger of swimming in fresh water.", + "length": 138 + }, + { + "text": "Doctors diagnosed her with primary amebic meningoencephalitis (PAM), a rare form of meningitis which has only produced 130 cases in the U.", + "length": 138 + }, + { + "text": "Getting better: Doctors believe 12-year-old Kali Hardig will survive her rare form of meningitis she got from an amoeba at an Arkansas water park .", + "length": 147 + }, + { + "text": "Doctors say the 12-year-old girl who contracted a deadly amoeba while swimming at an Arkansas water park will be the third to survive the brain-eating parasite.", + "length": 160 + }, + { + "text": "Kali was brought into the Arkansas Children's Hospital on July 19 with a fever, not long after she went swimming at the Willow Springs Water Park in Little Rock.", + "length": 161 + }, + { + "text": "The condition is caused when the Naegleria parasite enters a patient's brain, usually after accidentally inhaling stagnant, warm water up the nose while swimming.", + "length": 162 + }, + { + "text": "Another case: 12-year-old Zachary Reyna of LaBelle, Florida is also fighting the infection after contracting the amoeba knee boarding in a ditch earlier this month .", + "length": 165 + }, + { + "text": "Another tragedy: Davian Briggs, 7, died of the disease in 2010 after swimming at the Willow Springs Water Park - where little Kali is believed to have been infected .", + "length": 166 + }, + { + "text": "Mother's battle: Traci Hardig, Kali's mother, is battling breast cancer and is focusing on being healthy to take care of her daughter when she is released from the hospital .", + "length": 174 + }, + { + "text": "She encourages parents to make their children wear nose plugs while swimming - to minimize the risk of inhaling water through the nose and contracting the Naegleria parasite.", + "length": 174 + }, + { + "text": "Closed: The owners of Willow Springs Water Park near Little Rock, Arkansas, have shut down the lake after learning of two reported cases of the deadly disease in three years .", + "length": 175 + }, + { + "text": "'Though the odds of contracting Naegleria are extremely low, they are just not good enough to allow our friends or family to swim,' owners David and Lou Ann Ratliff said in a statement.", + "length": 185 + }, + { + "text": "Symptoms appear quickly, about one to seven days after inception, and include headache, fever, nausea, vomiting and get worse causing stiff neck, confusion, loss of balance, seizures and hallucinations.", + "length": 202 + }, + { + "text": "'We've went from being told that our little girl wouldn't survive this amoeba to now they're saying that Kali is going to be the third survivor and going to get to go home,' said her mother Traci Hardig.", + "length": 203 + }, + { + "text": "Arkansas Department of Health spokesman Ed Barham told MailOnline the water must be forcefully, almost violently, pushed up the nose - as it can be after diving into the water, being dunked or using a water slide.", + "length": 213 + }, + { + "text": "The park, which attracts up to 250 swimmers a day - and thousands over the course of a summer - is now looking into installing a hard pool bottom at the swimming hole and filling it with chlorinated water supplied by Little Rock.", + "length": 229 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6812908053398132 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:51.002166431Z", + "first_section_created": "2025-12-23T09:34:51.002597149Z", + "last_section_published": "2025-12-23T09:34:51.002922362Z", + "all_results_received": "2025-12-23T09:34:51.104887663Z", + "output_generated": "2025-12-23T09:34:51.105548789Z", + "total_processing_time_ms": 103, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 101, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:51.002597149Z", + "publish_time": "2025-12-23T09:34:51.002807757Z", + "first_worker_start": "2025-12-23T09:34:51.003319778Z", + "last_worker_end": "2025-12-23T09:34:51.10014Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:51.003447183Z", + "start_time": "2025-12-23T09:34:51.003526986Z", + "end_time": "2025-12-23T09:34:51.003666692Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:51.003617Z", + "start_time": "2025-12-23T09:34:51.003738Z", + "end_time": "2025-12-23T09:34:51.10014Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 96 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:51.003336178Z", + "start_time": "2025-12-23T09:34:51.003409281Z", + "end_time": "2025-12-23T09:34:51.003523286Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:51.003254475Z", + "start_time": "2025-12-23T09:34:51.003319778Z", + "end_time": "2025-12-23T09:34:51.00336808Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:51.002836558Z", + "publish_time": "2025-12-23T09:34:51.002922362Z", + "first_worker_start": "2025-12-23T09:34:51.003395381Z", + "last_worker_end": "2025-12-23T09:34:51.104205Z", + "total_journey_time_ms": 101, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:51.003447183Z", + "start_time": "2025-12-23T09:34:51.003553787Z", + "end_time": "2025-12-23T09:34:51.00363509Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:51.003592Z", + "start_time": "2025-12-23T09:34:51.00375Z", + "end_time": "2025-12-23T09:34:51.104205Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 100 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:51.003425882Z", + "start_time": "2025-12-23T09:34:51.003465184Z", + "end_time": "2025-12-23T09:34:51.003530286Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:51.003361679Z", + "start_time": "2025-12-23T09:34:51.003395381Z", + "end_time": "2025-12-23T09:34:51.003426182Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 196, + "min_processing_ms": 96, + "max_processing_ms": 100, + "avg_processing_ms": 98, + "median_processing_ms": 100, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3746, + "slowest_section_id": 1, + "slowest_section_time_ms": 101 + } +} diff --git a/data/output/004da1ee17241ad2505624efa3de81886c12dc3f.json b/data/output/004da1ee17241ad2505624efa3de81886c12dc3f.json new file mode 100644 index 0000000..0edb5e1 --- /dev/null +++ b/data/output/004da1ee17241ad2505624efa3de81886c12dc3f.json @@ -0,0 +1,396 @@ +{ + "file_name": "004da1ee17241ad2505624efa3de81886c12dc3f.txt", + "total_words": 908, + "top_n_words": [ + { + "word": "the", + "count": 44 + }, + { + "word": "and", + "count": 31 + }, + { + "word": "of", + "count": 28 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "she", + "count": 17 + }, + { + "word": "s", + "count": 16 + }, + { + "word": "her", + "count": 15 + }, + { + "word": "was", + "count": 13 + }, + { + "word": "kercher", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Sam Webb .", + "length": 10 + }, + { + "text": "No one deserves that.", + "length": 21 + }, + { + "text": "She wanted to be here.", + "length": 22 + }, + { + "text": "She didn't deserve that.", + "length": 24 + }, + { + "text": "Everything she went through.", + "length": 28 + }, + { + "text": "Her throat had been slashed.", + "length": 28 + }, + { + "text": "She really fought to be here.", + "length": 29 + }, + { + "text": "It was clearly an unnatural death.", + "length": 34 + }, + { + "text": "Amanda Knox, pictured cycling in Seattle.", + "length": 41 + }, + { + "text": "One male was convicted and did not appeal.", + "length": 42 + }, + { + "text": "It's another chapter moving things forward.", + "length": 43 + }, + { + "text": "'Three individuals were arrested and tried.", + "length": 43 + }, + { + "text": "The fear and the terror and not knowing why.", + "length": 44 + }, + { + "text": "'We all definitely want some sort of closure.", + "length": 45 + }, + { + "text": "'Seeing the city of Perugia and making new friends.", + "length": 51 + }, + { + "text": "'Everything that Meredith must have felt that night.", + "length": 52 + }, + { + "text": "I'll miss you but you'll go and have fun\",' she said.", + "length": 53 + }, + { + "text": "The pair were originally found guilty of murder in 2009.", + "length": 56 + }, + { + "text": "Rudy Guede (left) was sentenced to 30 years in prison in 2008.", + "length": 62 + }, + { + "text": "Loss: Meredith Kercher, who was murdered in 2007 in Perugia, Italy.", + "length": 67 + }, + { + "text": "' None of Miss Kercher's relatives were present for the five-minute hearing.", + "length": 76 + }, + { + "text": "Knox was sentenced to 28 years and six months while Sollecito was jailed for 25 years.", + "length": 86 + }, + { + "text": "I do conclude that she was unlawfully killed,' said Dr Palmer, senior coroner for the south London area.", + "length": 104 + }, + { + "text": "top of that to have all the media attention that has gone for so long makes it very difficult to cope with.", + "length": 107 + }, + { + "text": "Knox has refused to go back from America for the hearing saying they would have to drag her 'kicking and screaming.", + "length": 115 + }, + { + "text": "She recently had her guilty verdict reinstated at Italy's highest court for the murder of the Leeds University student .", + "length": 120 + }, + { + "text": "' Last month it emerged 27-year-old Guede, an Ivorian, has been allowed out on day release after just six years to study.", + "length": 121 + }, + { + "text": "'On the night of the first and second of November 2007 Meredith was found in her bedroom at a residence in Perugia, Italy.", + "length": 122 + }, + { + "text": "Rudy Guede, a drug dealer, is serving a 16-year sentence over the death, though the courts have said he did not act alone.", + "length": 122 + }, + { + "text": "'She died, the autopsy tells us, as a result of haemorraghic shock from stab and incised wounds to the vasculature of the neck.", + "length": 127 + }, + { + "text": "They were cleared nearly two years later - when Knox returned to the US - but the appeal court ordered a fresh trial last March.", + "length": 128 + }, + { + "text": "Earlier this year Miss Kercher's sister Stephanie said the victim 'had been forgotten' in the media storm surrounding Amanda Knox.", + "length": 130 + }, + { + "text": "She added her family had struggled to keep their memories of her alive when press coverage of her murder focused on another person.", + "length": 131 + }, + { + "text": "'The other two were convicted and appealed and the present position I believe is that there are further proceedings currently in Italy.", + "length": 135 + }, + { + "text": "An inquest had to be opened in this country because Meredith, known to family and friends as Mez, suffered a 'violent and unnatural death.", + "length": 138 + }, + { + "text": "Ms Kercher's brother Lyle also spoke of struggling to cope with his sister's death and said of the new guilty verdicts: 'It's not the end of it.", + "length": 144 + }, + { + "text": "Amanda Knox attends her appeal hearing to reconsider her guilty verdict in the murder of Meredith Kercher at Perugia's Court of Appeal in 2011 .", + "length": 144 + }, + { + "text": "Just having an end of the Italian justice system and knowing that's the final decision, and then we can start to remember just Meredith,' she said.", + "length": 147 + }, + { + "text": "Amanda Knox's former lover Raffaele Sollecito is appealing the re-convictions, with the case due to be heard by Italy's Supreme Court by early 2015 .", + "length": 149 + }, + { + "text": "Italy's highest Court of Cessation ruled in March 2013 to overturn the acquittal of Knox and Raffaele Sollecito for the 2007 murder of Meredith Kercher .", + "length": 153 + }, + { + "text": "'The family I think are keen for closure and after all these years later I can bring some closure by completing the inquest and inquisition,' the coroner added.", + "length": 160 + }, + { + "text": "' In January Amanda Knox, 26, and Raffaele Sollecito, 29, had their guilty verdicts reinstated at Italy's highest court for the murder of Leeds University student Miss Kercher.", + "length": 176 + }, + { + "text": "A coroner has concluded she died 'unnaturally' Exchange student Meredith Kercher was unlawfully killed, a coroner has concluded, nearly seven years on from her murder in Italy.", + "length": 176 + }, + { + "text": "Coroner's Office manager Barry May said he had liaised with Miss Kercher's brother, Lyle, who had spoken to family members, and they were 'content' for the proceedings to take place in their absence.", + "length": 199 + }, + { + "text": "In a brief hearing at Croydon Coroner's Court in south London, coroner Dr Roy Palmer said Miss Kercher met her death 'unlawfully' at her home in Perugia, Italy, over the night of November 1 and 2 in 2007.", + "length": 204 + }, + { + "text": "Speaking of when she last saw her sister alive as she prepared to study in Italy as part of her Leeds University course, Ms Kercher said: 'She was very excited about coming to Italy, looking forward to learning about Italian culture.", + "length": 233 + }, + { + "text": "Prosecutors claimed that Miss Kercher, of Coulsdon, Surrey, was the victim of a drug-fuelled sex game gone wrong but the defendants have consistently protested their innocence and claim they were not in the apartment the night she died.", + "length": 236 + }, + { + "text": "'We were just talking on the sofa and having a little cuddle of goodbye and I just remember her suddenly crying and saying that she was going to be sad to go but she was excited to come and I remember being quite taken aback and I thought, \"Don't make me sad.", + "length": 259 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.81914883852005 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:51.503710204Z", + "first_section_created": "2025-12-23T09:34:51.504215724Z", + "last_section_published": "2025-12-23T09:34:51.504631041Z", + "all_results_received": "2025-12-23T09:34:51.588059797Z", + "output_generated": "2025-12-23T09:34:51.588282806Z", + "total_processing_time_ms": 84, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 83, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:51.504215724Z", + "publish_time": "2025-12-23T09:34:51.504484635Z", + "first_worker_start": "2025-12-23T09:34:51.505087959Z", + "last_worker_end": "2025-12-23T09:34:51.587166Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:51.505131661Z", + "start_time": "2025-12-23T09:34:51.505196264Z", + "end_time": "2025-12-23T09:34:51.505292568Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:51.505252Z", + "start_time": "2025-12-23T09:34:51.505463Z", + "end_time": "2025-12-23T09:34:51.587166Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 81 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:51.505027357Z", + "start_time": "2025-12-23T09:34:51.50510516Z", + "end_time": "2025-12-23T09:34:51.505213664Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:51.505027257Z", + "start_time": "2025-12-23T09:34:51.505087959Z", + "end_time": "2025-12-23T09:34:51.505139461Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:51.504548038Z", + "publish_time": "2025-12-23T09:34:51.504631041Z", + "first_worker_start": "2025-12-23T09:34:51.505081659Z", + "last_worker_end": "2025-12-23T09:34:51.539412Z", + "total_journey_time_ms": 34, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:51.505125661Z", + "start_time": "2025-12-23T09:34:51.505173263Z", + "end_time": "2025-12-23T09:34:51.505179863Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:51.505383Z", + "start_time": "2025-12-23T09:34:51.505532Z", + "end_time": "2025-12-23T09:34:51.539412Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 33 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:51.505077059Z", + "start_time": "2025-12-23T09:34:51.505120361Z", + "end_time": "2025-12-23T09:34:51.505128061Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:51.505040758Z", + "start_time": "2025-12-23T09:34:51.505081659Z", + "end_time": "2025-12-23T09:34:51.505084159Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 114, + "min_processing_ms": 33, + "max_processing_ms": 81, + "avg_processing_ms": 57, + "median_processing_ms": 81, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2553, + "slowest_section_id": 0, + "slowest_section_time_ms": 82 + } +} diff --git a/data/output/004dceb73797ba531cb200afeb14ea5961d01160.json b/data/output/004dceb73797ba531cb200afeb14ea5961d01160.json new file mode 100644 index 0000000..7f6d6e2 --- /dev/null +++ b/data/output/004dceb73797ba531cb200afeb14ea5961d01160.json @@ -0,0 +1,242 @@ +{ + "file_name": "004dceb73797ba531cb200afeb14ea5961d01160.txt", + "total_words": 357, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "he", + "count": 7 + }, + { + "word": "wenger", + "count": 7 + }, + { + "word": "chelsea", + "count": 6 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "that", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Or Oscar.", + "length": 9 + }, + { + "text": "Or Willian.", + "length": 11 + }, + { + "text": "On Sunday they are the favourites.", + "length": 34 + }, + { + "text": "’ Like our Arsenal Facebook page.", + "length": 35 + }, + { + "text": "He said: ‘They are one of the favourites.", + "length": 43 + }, + { + "text": "‘If you mark Costa, Eden Hazard could score.", + "length": 46 + }, + { + "text": "Jose Mourinho is unbeaten in meetings with Wenger as a manager .", + "length": 64 + }, + { + "text": "But every team had its difficulties at some stage in the season.", + "length": 64 + }, + { + "text": "They have had the perfect start and have the best numbers until now.", + "length": 68 + }, + { + "text": "Diego Costa in training for Chelsea ahead of the clash with Arsenal on Sunday .", + "length": 79 + }, + { + "text": "Arsenal boss Arsene Wenger has questioned Costa's injury problems ahead of the showdown .", + "length": 89 + }, + { + "text": "’ The 25-year-old has been in supreme form for Chelsea scoring eight in six Premier League games so far .", + "length": 107 + }, + { + "text": "’ With a ratio of eight Premier League goals in six games, it’s certainly possible to share that conclusion.", + "length": 112 + }, + { + "text": "‘It is always difficult to find balance and adapt to opponents strengths without restricting your own expression.", + "length": 115 + }, + { + "text": "‘We have to adapt a little bit but let’s be honest, any single player at Chelsea can make a difference,’ Wenger said.", + "length": 124 + }, + { + "text": "Despite Chelsea’s fine start, and Wenger’s woeful record against Mourinho, the Frenchman insisted he would not alter his tactics for the match.", + "length": 147 + }, + { + "text": "It was put to Wenger that Jose Mourinho might be applying a smokescreen in his regular comments about the allegedly fragile hamstrings of his striker.", + "length": 150 + }, + { + "text": "Arsene Wenger expects Chelsea to take a tumble at some stage but he’s far less convinced that Diego Costa’s much-discussed hamstrings will creak under the strain.", + "length": 166 + }, + { + "text": "Arsenal’s manager smiled at the suggestions before today’s clash at Stamford Bridge, saying: ‘Honestly, when you see him play it is not obvious (that he has an injury).", + "length": 174 + }, + { + "text": "‘He has had a problem with his hamstring for a long time and we saw that in the Champions League final but when I have seen him recently, it is difficult to guess that he has a hamstring problem.", + "length": 197 + }, + { + "text": "The 25-year-old’s impact appears to have made up for Chelsea’s striking shortcomings from last season, with Wenger admitting they are favourites for the title though he raised questions over their staying power.", + "length": 215 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6037726402282715 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:52.004231906Z", + "first_section_created": "2025-12-23T09:34:52.004620621Z", + "last_section_published": "2025-12-23T09:34:52.004806129Z", + "all_results_received": "2025-12-23T09:34:52.06997275Z", + "output_generated": "2025-12-23T09:34:52.070129856Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:52.004620621Z", + "publish_time": "2025-12-23T09:34:52.004806129Z", + "first_worker_start": "2025-12-23T09:34:52.005285448Z", + "last_worker_end": "2025-12-23T09:34:52.069085Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:52.005275448Z", + "start_time": "2025-12-23T09:34:52.00534155Z", + "end_time": "2025-12-23T09:34:52.005379552Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:52.005484Z", + "start_time": "2025-12-23T09:34:52.005633Z", + "end_time": "2025-12-23T09:34:52.069085Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:52.005280048Z", + "start_time": "2025-12-23T09:34:52.005387052Z", + "end_time": "2025-12-23T09:34:52.005495457Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:52.005231346Z", + "start_time": "2025-12-23T09:34:52.005285448Z", + "end_time": "2025-12-23T09:34:52.00532955Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2066, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/004e53936bb473d174ad8bcdc8bcf4422f32ec8f.json b/data/output/004e53936bb473d174ad8bcdc8bcf4422f32ec8f.json new file mode 100644 index 0000000..dcbaf8e --- /dev/null +++ b/data/output/004e53936bb473d174ad8bcdc8bcf4422f32ec8f.json @@ -0,0 +1,254 @@ +{ + "file_name": "004e53936bb473d174ad8bcdc8bcf4422f32ec8f.txt", + "total_words": 473, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "zhang", + "count": 12 + }, + { + "word": "dr", + "count": 11 + }, + { + "word": "is", + "count": 10 + }, + { + "word": "it", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "cloak", + "count": 9 + }, + { + "word": "s", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Now you see it...", + "length": 17 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Dr Zhang is not the only innovator at TED2013.", + "length": 46 + }, + { + "text": "Whenever the note is behind the cloak, it cannot be seen.", + "length": 57 + }, + { + "text": "The demonstration in the video is not the same as the one presented at TED2013.", + "length": 79 + }, + { + "text": "Last year, Dr Zhang was listed in the MIT Technology Review's '35 Innovators Under 35' list.", + "length": 92 + }, + { + "text": ": The box was made by attaching two pieces of calcite - a common mineral that can bend light .", + "length": 94 + }, + { + "text": "He told BoingBoing that the came up with the idea in 2010, and constructed the device 'just for fun.", + "length": 100 + }, + { + "text": "' In a video posted to YouTube, Dr Zhang demonstrates it by passing a rolled-up Post-It note by the box.", + "length": 104 + }, + { + "text": "Other designs are being worked on in places like London's Imperial College, Duke University and University of Texas.", + "length": 116 + }, + { + "text": "Dr Zhang's device is not the first invisibility cloak - which was made famous in the Harry Potter book and film series.", + "length": 119 + }, + { + "text": "The cloak is the work of Dr Baile Zhang, who says he developed the cloak more as a fun hobby than a serious physics breakthrough.", + "length": 129 + }, + { + "text": "' Dr Zhang, 31, is currently an assistant professor of physics and applied physics at Nanyang Technological University in Singapore.", + "length": 132 + }, + { + "text": "Scientist: The 'cloak' is the work of Singapore physics professor Baile Zhang, who showed it off at the TED2013 conference on Monday .", + "length": 134 + }, + { + "text": "Now you don't: The rolled-up Post-It note in this video cannot be seen as it passes behind the box in this demonstration posted on YouTube .", + "length": 140 + }, + { + "text": "Dr Zhang told the tech blog that he developed the box by attaching two pieces of calcite - a carbonate mineral that can bend light - together.", + "length": 142 + }, + { + "text": "Though Dr Zhang's device is more of a box than a cloak, the purpose is just the same - bending light around an object so that it cannot be seen.", + "length": 144 + }, + { + "text": "Dr Zhang showed off his invention, the 'macroscopic invisibility cloak at the prestigious TED2013 conference in Long Beach, California on Monday.", + "length": 145 + }, + { + "text": "Life imitating art: The technology was made famous by Harry Potter - but now researchers at various colleges have been working on their own versions .", + "length": 150 + }, + { + "text": "BoingBoing's Carla Sinclair, who spoke to Dr Zhang on Monday, writes: 'The idea came to him in 2010, and today was the first time he's shown it to a live audience.", + "length": 163 + }, + { + "text": "Sugata Mitra was awarded the conference's $1million prize for his 'Hole in the Wall Experiment,' his vision for the future of education where students teach themselves.", + "length": 168 + }, + { + "text": "A scientist has become the talk of the town at a popular tech conference this week as he unveiled his own 'invisible cloak' technology that's straight out of a Harry Potter novel.", + "length": 179 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4252515435218811 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:52.505592471Z", + "first_section_created": "2025-12-23T09:34:52.505911384Z", + "last_section_published": "2025-12-23T09:34:52.506075691Z", + "all_results_received": "2025-12-23T09:34:52.568589505Z", + "output_generated": "2025-12-23T09:34:52.56870621Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:52.505911384Z", + "publish_time": "2025-12-23T09:34:52.506075691Z", + "first_worker_start": "2025-12-23T09:34:52.50656141Z", + "last_worker_end": "2025-12-23T09:34:52.567669Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:52.50654541Z", + "start_time": "2025-12-23T09:34:52.506612612Z", + "end_time": "2025-12-23T09:34:52.506676415Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:52.506804Z", + "start_time": "2025-12-23T09:34:52.506942Z", + "end_time": "2025-12-23T09:34:52.567669Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:52.506528409Z", + "start_time": "2025-12-23T09:34:52.506600612Z", + "end_time": "2025-12-23T09:34:52.506661214Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:52.506490907Z", + "start_time": "2025-12-23T09:34:52.50656141Z", + "end_time": "2025-12-23T09:34:52.506594011Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2649, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/004e6e14b9498ae85e92cbb81adf76ac8b21f472.json b/data/output/004e6e14b9498ae85e92cbb81adf76ac8b21f472.json new file mode 100644 index 0000000..b7abff6 --- /dev/null +++ b/data/output/004e6e14b9498ae85e92cbb81adf76ac8b21f472.json @@ -0,0 +1,436 @@ +{ + "file_name": "004e6e14b9498ae85e92cbb81adf76ac8b21f472.txt", + "total_words": 1094, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "a", + "count": 36 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "he", + "count": 17 + }, + { + "word": "s", + "count": 16 + }, + { + "word": "that", + "count": 16 + }, + { + "word": "umpires", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "...", + "length": 3 + }, + { + "text": "Umpires?", + "length": 8 + }, + { + "text": "\"We're human.", + "length": 13 + }, + { + "text": "John's Redmen, 1-0.", + "length": 19 + }, + { + "text": "\"We have families; we have emotions.", + "length": 36 + }, + { + "text": "(MLB does run an annual umpire camp.", + "length": 36 + }, + { + "text": "\"I just think umpiring is interesting.", + "length": 38 + }, + { + "text": "He is, literally, the king of the hill.", + "length": 39 + }, + { + "text": "He says he remembered the games vividly.", + "length": 40 + }, + { + "text": "We're not just robots they send out there.", + "length": 42 + }, + { + "text": "Almost nobody dreams of becoming an umpire.", + "length": 43 + }, + { + "text": "\"Now I'm always interested who the umpires are.", + "length": 47 + }, + { + "text": "More common is to be vilified for missed calls.", + "length": 47 + }, + { + "text": "\" Which is a point the umpires would appreciate.", + "length": 48 + }, + { + "text": "A little scary and maybe a major personality flaw!", + "length": 50 + }, + { + "text": "\" Pitchers get more support than umpires, of course.", + "length": 52 + }, + { + "text": "though few pay attention outside their fellow umpires.", + "length": 54 + }, + { + "text": "And no play can conclude until the umpire makes the call.", + "length": 57 + }, + { + "text": "\"They say, 'We need a base; we need an umpire; same thing.", + "length": 58 + }, + { + "text": ") Darling echoes Weber's concerns in his own field, pitching.", + "length": 61 + }, + { + "text": "there is a love for that person immediately after the process.", + "length": 62 + }, + { + "text": "\"Umpires are people, too,\" veteran ump Tim McClelland told Weber.", + "length": 65 + }, + { + "text": "I would not have said it when I was playing, but after a shutout ...", + "length": 68 + }, + { + "text": "When watching games now, the Yankees fan says, he'll focus on the umpires.", + "length": 74 + }, + { + "text": "There are countless children who dream of becoming a major-league pitcher.", + "length": 74 + }, + { + "text": "\"That's really what [being an umpire] is about -- is being in charge,\" he said.", + "length": 79 + }, + { + "text": "(CNN) -- No play can begin in a baseball game until the pitcher throws the ball.", + "length": 80 + }, + { + "text": "\" Umpires need that presence because they're often baseball's most disrespected men.", + "length": 84 + }, + { + "text": "\"It would be like training for the marathon and never running more than 5 miles,\" he says.", + "length": 90 + }, + { + "text": "Though time has dulled the pain, \"I think he lives with [that call] every day,\" Weber said.", + "length": 91 + }, + { + "text": "\" Weber says his time with umpires has made him much more sympathetic to their judgmental tasks.", + "length": 96 + }, + { + "text": "(\"The owners basically see them like bases,\" former baseball Commissioner Fay Vincent told Weber.", + "length": 97 + }, + { + "text": "\"Identifying and preserving million-dollar arms are [the purview] of doctors, not baseball people.", + "length": 98 + }, + { + "text": "\"If there's anything that characterizes the major league umpire, it's that special kind of chutzpah.", + "length": 100 + }, + { + "text": "People hate 'em, and they somehow perceive of umpiring as a flaw in the game, but I don't,\" he said.", + "length": 100 + }, + { + "text": "\"Of course, I knew this was a fallacy so I decided to write about the travails of major league pitchers.", + "length": 104 + }, + { + "text": "\"We're not just robots they send out there,\" umpire Tim McClelland (2nd from L) told author Bruce Weber.", + "length": 104 + }, + { + "text": "\" That leaves them at a disadvantage when they have to go deeper into a game or cope with a tough inning, he says.", + "length": 114 + }, + { + "text": "\" Umpires, on the other hand, rarely get written about at all -- in fact, they're often treated as less than human.", + "length": 115 + }, + { + "text": "\"When a dreadful thing happens to you in front of so many people and you become famous for it, it must be devastating.", + "length": 118 + }, + { + "text": "\"I definitely watched tapes and read box scores, but I was very clear on almost all the minutiae of the good old days.", + "length": 118 + }, + { + "text": "Throwing a ball 95 mph to tin cup-sized quadrants sounds pretty difficult to me, and I wanted to express this to the reader.", + "length": 124 + }, + { + "text": "You did something together that could not have been done alone, and nobody can understand what you went through to get there.", + "length": 125 + }, + { + "text": "Both jobs require a great deal of command, neither gets enough training, and both are often disrespected by others in the game.", + "length": 127 + }, + { + "text": "Yet these figures -- the man on the mound and the men who stand in judgment -- are vastly different in importance to the average fan.", + "length": 133 + }, + { + "text": "' \") Weber found a fraternity (and they are almost all men) much like cops or soldiers: tight-lipped believers in baseball law and order.", + "length": 137 + }, + { + "text": "Darling observes that the relationship between a pitcher and his catcher during a well-pitched game \"is one of sport's most beautiful dances.", + "length": 141 + }, + { + "text": "\"Within baseball circles there is a common baseball axiom, 'If pitchers weren't so stupid, hitters would never get a hit,' \" he said in an e-mail.", + "length": 146 + }, + { + "text": "\" Darling's book is a chronicle of pitchers' thought processes, using individual innings from his pitching or broadcasting career to make his point.", + "length": 148 + }, + { + "text": "And yet Major League Baseball doesn't participate in umpire training or development, entrusting it to two umpire-run private schools, Weber observes.", + "length": 149 + }, + { + "text": "Somebody says, 'Kill the umpire,' and people go, 'Heh, heh, that's funny,' but in order to do that, you have to disassociate the umpire from the person.", + "length": 152 + }, + { + "text": "What he found is that what looks so obvious on television at home is often a challenge on the field, a matter not just of eyesight but positioning, rule-book knowledge and basic guts.", + "length": 183 + }, + { + "text": "In these days of strict pitch counts and injury concern, pitchers are \"undertrain[ed],\" he says, noting that top draft choices climb the ranks \"never allowed to throw more than 110 pitches.", + "length": 189 + }, + { + "text": "Weber devotes a moving passage in his book to a conversation with the retired Don Denkinger, a 29-year veteran remembered by fans (if he's remembered at all) for a wrong call in the 1985 World Series.", + "length": 200 + }, + { + "text": "Indeed, despite a library of books by and about pitchers (Jim Bouton's \"Ball Four,\" Jim Brosnan's \"The Long Season,\" Christy Mathewson's \"Pitching in a Pinch\"), Darling said he believes that people still don't understand what it takes to stand on that mound.", + "length": 258 + }, + { + "text": "And yet the positions share a number of similarities, according to two new books: \"As They See 'Em\" (Scribner), by New York Times writer Bruce Weber, and \"The Complete Game\" (Knopf), by former major-league pitcher (and current New York Mets broadcaster) Ron Darling.", + "length": 266 + }, + { + "text": "Aside from the vitriol they face -- the managers kicking dirt, the spectators yelling \"Kill the ump\" -- they're second-guessed by broadcasters and barely tolerated by management, as Weber reveals in detailing the episodes preceding and following the 1999 umpires' strike.", + "length": 271 + }, + { + "text": "Weber immersed himself in the \"land of umpires,\" as the book's subtitle calls it, attending umpiring school, calling games at various levels of pro ball and talking with those who were willing -- including the legendary Doug Harvey, who was called \"God\" for his imperious demeanor.", + "length": 281 + }, + { + "text": "He talks about panic overtaking a pitcher, as it did for Darling in a 1984 game in which he got pasted by the Cubs; he also addresses the rush of pitching in a World Series game and -- in a treat for baseball fans -- goes over the extra innings in perhaps the most famous college baseball game ever, a 1981 extra-inning contest that Darling's Yale Bulldogs lost to Frank Viola's St.", + "length": 382 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5589116811752319 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:53.006247708Z", + "first_section_created": "2025-12-23T09:34:53.007810671Z", + "last_section_published": "2025-12-23T09:34:53.008181186Z", + "all_results_received": "2025-12-23T09:34:53.123448122Z", + "output_generated": "2025-12-23T09:34:53.123679531Z", + "total_processing_time_ms": 117, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 115, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:53.007810671Z", + "publish_time": "2025-12-23T09:34:53.00803948Z", + "first_worker_start": "2025-12-23T09:34:53.008634904Z", + "last_worker_end": "2025-12-23T09:34:53.122485Z", + "total_journey_time_ms": 114, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:53.008647005Z", + "start_time": "2025-12-23T09:34:53.008737908Z", + "end_time": "2025-12-23T09:34:53.008849813Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:53.008905Z", + "start_time": "2025-12-23T09:34:53.009043Z", + "end_time": "2025-12-23T09:34:53.122485Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 113 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:53.008685306Z", + "start_time": "2025-12-23T09:34:53.008749209Z", + "end_time": "2025-12-23T09:34:53.008868614Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:53.008553601Z", + "start_time": "2025-12-23T09:34:53.008634904Z", + "end_time": "2025-12-23T09:34:53.008683006Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:53.008075482Z", + "publish_time": "2025-12-23T09:34:53.008181186Z", + "first_worker_start": "2025-12-23T09:34:53.008617203Z", + "last_worker_end": "2025-12-23T09:34:53.102501Z", + "total_journey_time_ms": 94, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:53.008748809Z", + "start_time": "2025-12-23T09:34:53.00878291Z", + "end_time": "2025-12-23T09:34:53.008810311Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:53.008998Z", + "start_time": "2025-12-23T09:34:53.009128Z", + "end_time": "2025-12-23T09:34:53.102501Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 93 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:53.008694007Z", + "start_time": "2025-12-23T09:34:53.008737908Z", + "end_time": "2025-12-23T09:34:53.00878171Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:53.008546901Z", + "start_time": "2025-12-23T09:34:53.008617203Z", + "end_time": "2025-12-23T09:34:53.008632604Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 206, + "min_processing_ms": 93, + "max_processing_ms": 113, + "avg_processing_ms": 103, + "median_processing_ms": 113, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3116, + "slowest_section_id": 0, + "slowest_section_time_ms": 114 + } +} diff --git a/data/output/004e6e81e7efd6c40259a0b50631e08207de74bc.json b/data/output/004e6e81e7efd6c40259a0b50631e08207de74bc.json new file mode 100644 index 0000000..be2b150 --- /dev/null +++ b/data/output/004e6e81e7efd6c40259a0b50631e08207de74bc.json @@ -0,0 +1,612 @@ +{ + "file_name": "004e6e81e7efd6c40259a0b50631e08207de74bc.txt", + "total_words": 1035, + "top_n_words": [ + { + "word": "the", + "count": 47 + }, + { + "word": "in", + "count": 36 + }, + { + "word": "of", + "count": 33 + }, + { + "word": "to", + "count": 31 + }, + { + "word": "and", + "count": 28 + }, + { + "word": "women", + "count": 20 + }, + { + "word": "more", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "are", + "count": 14 + }, + { + "word": "have", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "A .", + "length": 3 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "NHS.", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "mothers.", + "length": 8 + }, + { + "text": "Louise .", + "length": 8 + }, + { + "text": "Health .", + "length": 8 + }, + { + "text": "question.", + "length": 9 + }, + { + "text": "Eve 2010.", + "length": 9 + }, + { + "text": "‘This .", + "length": 9 + }, + { + "text": "Earlier .", + "length": 9 + }, + { + "text": "‘This .", + "length": 9 + }, + { + "text": "‘Every .", + "length": 10 + }, + { + "text": "age of 44.", + "length": 10 + }, + { + "text": "In 2007, .", + "length": 10 + }, + { + "text": "’ Last .", + "length": 10 + }, + { + "text": "Josephine .", + "length": 11 + }, + { + "text": "‘Because .", + "length": 12 + }, + { + "text": "need more support.", + "length": 18 + }, + { + "text": "childbearing years.", + "length": 19 + }, + { + "text": "’ The Royal College .", + "length": 23 + }, + { + "text": "In 2000 the number was 44.", + "length": 26 + }, + { + "text": "another 5,000 in training.", + "length": 26 + }, + { + "text": "pressure is put on the NHS.", + "length": 27 + }, + { + "text": "mothers who have turned 40.", + "length": 27 + }, + { + "text": "It means one in 25 are to .", + "length": 27 + }, + { + "text": "Having a child at such an .", + "length": 27 + }, + { + "text": "50, up by a third in a year.", + "length": 28 + }, + { + "text": "enjoying being a grandparent.", + "length": 29 + }, + { + "text": "Older women are more likely to have .", + "length": 37 + }, + { + "text": "Midwives also warn that because older .", + "length": 39 + }, + { + "text": "abnormalities such as Down’s Syndrome.", + "length": 40 + }, + { + "text": "babies into the world past the age of 40.", + "length": 41 + }, + { + "text": "cent from 26,419 in 2008 to 29,994 in 2012.", + "length": 43 + }, + { + "text": "mother, the higher the risk of complications.", + "length": 45 + }, + { + "text": "likely to be born with genetic abnormalities.", + "length": 45 + }, + { + "text": "change in natural menopause in the past five years.", + "length": 51 + }, + { + "text": "A quarter believe women should stop trying to bring .", + "length": 53 + }, + { + "text": "care, advice and support they need in their pregnancy.", + "length": 54 + }, + { + "text": "Their babies also face a greater risk of ill-health or .", + "length": 56 + }, + { + "text": "the current age limit of 42 for IVF on the NHS is too old.", + "length": 58 + }, + { + "text": "the age of 50, after travelling to Cyprus for IVF treatment.", + "length": 60 + }, + { + "text": "at providing for women who want to be mothers later in life.", + "length": 60 + }, + { + "text": "cope with the changing patterns of women delaying motherhood.", + "length": 61 + }, + { + "text": "East  Renfrewshire, around one in three mothers  was over 35.", + "length": 63 + }, + { + "text": "should receive IVF to help them conceive beyond their natural .", + "length": 63 + }, + { + "text": "In 2012, there were 154 babies born to mothers over the age of .", + "length": 64 + }, + { + "text": "such high blood pressure, diabetes and problems with the placenta.", + "length": 66 + }, + { + "text": "and these women tend to have more complications than younger women.", + "length": 67 + }, + { + "text": "is more pronounced as women have babies at increasingly greater ages.", + "length": 69 + }, + { + "text": "Happy: Carole Hobson with her two-year-old twins Freida and Matthew .", + "length": 69 + }, + { + "text": "of these risks it is crucial that these women get the right level of .", + "length": 70 + }, + { + "text": "Quintavalle of Comment on Reproductive Ethics said: ‘The older the .", + "length": 70 + }, + { + "text": "miscarriages and ectopic pregnancies while their  children are more .", + "length": 70 + }, + { + "text": "old age also has implications on the time these mothers will have to .", + "length": 70 + }, + { + "text": "single mother, now 61, spent more than £20,000 to have children and .", + "length": 70 + }, + { + "text": "ministers revealed the sharp rise in older mothers in a parliamentary .", + "length": 71 + }, + { + "text": "some areas of Britain, including Windsor and Maidenhead, Brighton and .", + "length": 71 + }, + { + "text": "doubling of births to over-fifties isn’t something which would have .", + "length": 71 + }, + { + "text": "of Obstetricians and Gynaecologists has warned that those over the age .", + "length": 72 + }, + { + "text": "giving birth now than 20 years ago and research shows that older women .", + "length": 72 + }, + { + "text": "twins Frieda and Matthew were born by emergency Caesarean on Christmas .", + "length": 72 + }, + { + "text": "warned that without more midwives the health service would struggle to .", + "length": 72 + }, + { + "text": "mothers and their offspring often require higher levels of care, extra .", + "length": 72 + }, + { + "text": "is one of the reasons behind the RCM’s call for more midwives in the .", + "length": 72 + }, + { + "text": "survey of more than 2,000 people across the UK found 31 per cent think .", + "length": 72 + }, + { + "text": "number of births to mothers aged 40 and over has also risen, up 13 per .", + "length": 72 + }, + { + "text": "ectopic pregnancies and genetic problems in the child and other issues .", + "length": 72 + }, + { + "text": "qualified barrister from Kent, said: ‘In Britain we need to be better .", + "length": 73 + }, + { + "text": "month a survey found almost three-quarters of people do not think women .", + "length": 73 + }, + { + "text": "‘There are an increasing numbers of older women who are having babies .", + "length": 73 + }, + { + "text": "woman, no matter what her age, deserves the best possible care and this .", + "length": 73 + }, + { + "text": "this year Shameless actress Tina Malone gave birth to daughter Flame at .", + "length": 73 + }, + { + "text": "have a higher risk of developing complications during pregnancy and may .", + "length": 73 + }, + { + "text": "of 40 are up to three times more likely to lose their baby than younger .", + "length": 73 + }, + { + "text": "Desperate Housewives star Marcia Cross had twin daughters in 2007 at the .", + "length": 74 + }, + { + "text": "Silverton, director for midwifery at the Royal College of Midwives said: .", + "length": 74 + }, + { + "text": "Older mothers are more likely to have increased rates of miscarriage and .", + "length": 74 + }, + { + "text": "Department of Health spokesman said: ‘We know that more older women are .", + "length": 75 + }, + { + "text": "happened naturally, as there’s no way there has been that significant a .", + "length": 75 + }, + { + "text": "It is an indescribable joy, but it’s non-stop – it is like a full-time job.", + "length": 79 + }, + { + "text": "The number of women over 50 who are having babies has more than doubled in five years.", + "length": 86 + }, + { + "text": "Nine weeks premature and each weighing 3lb 3oz, they spent two months in neo-natal care.", + "length": 88 + }, + { + "text": "The number of women aged 50 and over who gave birth in 2012 hit 154, up from 69 in 2008 .", + "length": 89 + }, + { + "text": "Every week around three children are born to a mother in her fifties, the latest figures show.", + "length": 94 + }, + { + "text": "The figure has more than doubled since 2008 when there were 69 births to women aged 50 and over.", + "length": 96 + }, + { + "text": "Marcia Cross, pitured here in Desperate Housewives, had twin daughters in 2007 at the age of 44 .", + "length": 97 + }, + { + "text": "‘This will ensure that every mother has a named midwife who is responsible for personalised care.", + "length": 99 + }, + { + "text": "After four failed IVF attempts in Ukraine and Cyprus, donor embryos were implanted at a clinic in Mumbai.", + "length": 105 + }, + { + "text": "Carole Hobson became Britain’s oldest mother of twins at 58 after conceiving through IVFat an Indian clinic.", + "length": 110 + }, + { + "text": "Even someone who has a child aged over the age 35 is considered an ‘older mother’ by  medical professionals.", + "length": 113 + }, + { + "text": "Changing medical advice and advances in IVF treatment also mean more are willing to risk delaying having children.", + "length": 114 + }, + { + "text": "Around 20 per cent of babies are born to women aged 35 or older, the highest proportion since records began in 1938.", + "length": 116 + }, + { + "text": "The trend is the result of women choosing to concentrate on their careers rather than settling down to have a family.", + "length": 117 + }, + { + "text": "The dramatic increase in births to older women will deepen fears over the health of both the mothers and their babies.", + "length": 118 + }, + { + "text": "Other women are moving into new relationships later in life and are choosing to have more children with their new partner.", + "length": 122 + }, + { + "text": "At the same time, only 23 per cent of births were to women aged under 25 in 2012, down from almost half in the early 1970s.", + "length": 123 + }, + { + "text": "’ Shameless actress Tina Malone travelled to Cyprus for IVF treatment and conceived her daughter, Flame, with donor eggs, giving birth aged 50 .", + "length": 146 + }, + { + "text": "’ Treatment: Miss Hobson, now 61, spent more than £20,000 to have children and twins Frieda and Matthew were born by emergency Caesarean on Christmas Eve 2010 .", + "length": 163 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.45922571420669556 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:53.508944027Z", + "first_section_created": "2025-12-23T09:34:53.509289441Z", + "last_section_published": "2025-12-23T09:34:53.509556352Z", + "all_results_received": "2025-12-23T09:34:53.609539673Z", + "output_generated": "2025-12-23T09:34:53.609725281Z", + "total_processing_time_ms": 100, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 99, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:53.509289441Z", + "publish_time": "2025-12-23T09:34:53.509494849Z", + "first_worker_start": "2025-12-23T09:34:53.510136475Z", + "last_worker_end": "2025-12-23T09:34:53.576552Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:53.510141075Z", + "start_time": "2025-12-23T09:34:53.510224879Z", + "end_time": "2025-12-23T09:34:53.510359584Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:53.510432Z", + "start_time": "2025-12-23T09:34:53.51055Z", + "end_time": "2025-12-23T09:34:53.576552Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:53.510127775Z", + "start_time": "2025-12-23T09:34:53.510230879Z", + "end_time": "2025-12-23T09:34:53.510330183Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:53.510051372Z", + "start_time": "2025-12-23T09:34:53.510136475Z", + "end_time": "2025-12-23T09:34:53.510189477Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:53.50952285Z", + "publish_time": "2025-12-23T09:34:53.509556352Z", + "first_worker_start": "2025-12-23T09:34:53.510088773Z", + "last_worker_end": "2025-12-23T09:34:53.608708Z", + "total_journey_time_ms": 99, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:53.510023771Z", + "start_time": "2025-12-23T09:34:53.510088773Z", + "end_time": "2025-12-23T09:34:53.510113374Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:53.51032Z", + "start_time": "2025-12-23T09:34:53.51049Z", + "end_time": "2025-12-23T09:34:53.608708Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 98 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:53.510121575Z", + "start_time": "2025-12-23T09:34:53.510165076Z", + "end_time": "2025-12-23T09:34:53.510198378Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:53.510119874Z", + "start_time": "2025-12-23T09:34:53.510151276Z", + "end_time": "2025-12-23T09:34:53.510186077Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 164, + "min_processing_ms": 66, + "max_processing_ms": 98, + "avg_processing_ms": 82, + "median_processing_ms": 98, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2995, + "slowest_section_id": 1, + "slowest_section_time_ms": 99 + } +} diff --git a/data/output/004e6e935ea530b0992a89fd1307f7f41f4a234d.json b/data/output/004e6e935ea530b0992a89fd1307f7f41f4a234d.json new file mode 100644 index 0000000..ed4d8ba --- /dev/null +++ b/data/output/004e6e935ea530b0992a89fd1307f7f41f4a234d.json @@ -0,0 +1,282 @@ +{ + "file_name": "004e6e935ea530b0992a89fd1307f7f41f4a234d.txt", + "total_words": 681, + "top_n_words": [ + { + "word": "the", + "count": 46 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "obama", + "count": 14 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "religious", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Barry Lynn, said in a statement.", + "length": 32 + }, + { + "text": "\" Senate passes LGBT anti-discrimination bill .", + "length": 47 + }, + { + "text": "\"The ones hurt will be the most vulnerable in our society.", + "length": 58 + }, + { + "text": "\" Supreme Court rules against Obama in contraception case .", + "length": 59 + }, + { + "text": "The Senate passed a bill barring LGBT discrimination in the fall.", + "length": 65 + }, + { + "text": "But he suggested that religious groups could still rely on the 2002 order.", + "length": 74 + }, + { + "text": "Despite calls from religious leaders, faith-based groups will not be exempt.", + "length": 76 + }, + { + "text": "\"No forms of discrimination should be supported with the taxpayer dime, period.", + "length": 79 + }, + { + "text": "It said he made the \"right call\" for not tagging any religious exemptions to the document.", + "length": 90 + }, + { + "text": "Bush that allows religious groups to weigh prospective employees' faith in hiring decisions.", + "length": 92 + }, + { + "text": "But on the federal legislative level, LGBT groups have struggled to enact similar legislation.", + "length": 94 + }, + { + "text": "But Obama's signature on Monday did not touch a 2002 executive order signed by President George W.", + "length": 98 + }, + { + "text": "The action is not the first time Obama has used his presidential powers to benefit the LGBT community.", + "length": 102 + }, + { + "text": "\"I believe the administration has left open a path that religious groups can work with,\" Schneck said.", + "length": 102 + }, + { + "text": "This gave some opponents of the order hope that they could continue to consider sexual orientation in hiring decisions.", + "length": 119 + }, + { + "text": "Obama also noted that a majority of Fortune 500 companies have policies in place against discrimination based on sexual orientation.", + "length": 132 + }, + { + "text": "But the measure, which exempted religious groups from the would-be-law, did not make it to the House floor where Republicans opposed it.", + "length": 136 + }, + { + "text": "In 2010, he signed an order extending benefits to same-sex partners of executive branch employees already provided to opposite-sex partners.", + "length": 140 + }, + { + "text": "And attendees greeted Obama's call to continue applying pressure to \"resolve this problem once and for all\" with one resounding word: \"Amen.", + "length": 140 + }, + { + "text": "\" Rea Carey, executive director of the National Gay and Lesbian Task Force, was in the room as Obama signed the order and said it was an emotional moment.", + "length": 154 + }, + { + "text": "\"Faith-based groups that tap the public purse should play by the same rules as everyone else and not expect special treatment,\" the group's executive director, Rev.", + "length": 164 + }, + { + "text": "President Barack Obama signed an executive order Monday banning federal contractors from discriminating against employees on the basis of sexual orientation or gender identity.", + "length": 176 + }, + { + "text": "\"There are now millions of LGBT people and their families who are just going to sleep a little bit easier tonight knowing that they can't be fired from their jobs as federal contractors,\" she said.", + "length": 197 + }, + { + "text": "Gay federal workers are already protected from workplace discrimination by a Clinton-era order and Obama's action extended the protections to shield workers from gender identity-based discrimination.", + "length": 199 + }, + { + "text": "Americans United for Separation of Church and State, which joined a coalition of nearly 100 civil rights and LGBT groups urging Obama to reject calls for a religious exemption, thanked him for taking action.", + "length": 207 + }, + { + "text": "\"Thanks to your passion and advocacy and the irrefutable rightness of your cause, our government -- a government of the people, by the people and for the people -- will become just a little bit fairer,\" Obama said.", + "length": 214 + }, + { + "text": "Russell Moore, president of the Ethics \u0026 Religious Liberty Commission of the Southern Baptist Convention, had stronger words for Obama and worried that the Bush-era executive order would leave out some faith-based groups.", + "length": 221 + }, + { + "text": "One of those opponents, Stephen Schneck, director of the Institute for Policy Research \u0026 Catholic Studies at The Catholic University of America, said he was disappointed by Obama's decision regarding the religious exemption.", + "length": 224 + }, + { + "text": "\" Obama's executive action extends protections against sexual-based discrimination to employees of federal contractors operating outside of the 21 states and the District of Columbia that enacted their own non-discrimination legislation.", + "length": 237 + }, + { + "text": "\"While we don't know the full implications of this executive order, I am disappointed that this administration persistently violates the freedom of conscience for religious organizations that provide necessary relief for the poor and endangered,\" Moore said.", + "length": 258 + }, + { + "text": "During the ceremony, which comes 50 years after President Lyndon Johnson signed the Civil Rights Act of 1964, Obama also recalled the history of executive actions and legislation to ban discrimination in the workplace and \"make sure we the people applies to all the people.", + "length": 273 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5636561512947083 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:54.010302892Z", + "first_section_created": "2025-12-23T09:34:54.010606605Z", + "last_section_published": "2025-12-23T09:34:54.010858515Z", + "all_results_received": "2025-12-23T09:34:54.067359687Z", + "output_generated": "2025-12-23T09:34:54.067518494Z", + "total_processing_time_ms": 57, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 56, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:54.010606605Z", + "publish_time": "2025-12-23T09:34:54.010858515Z", + "first_worker_start": "2025-12-23T09:34:54.011374736Z", + "last_worker_end": "2025-12-23T09:34:54.066443Z", + "total_journey_time_ms": 55, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:54.011376536Z", + "start_time": "2025-12-23T09:34:54.011464539Z", + "end_time": "2025-12-23T09:34:54.011553643Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:54.011564Z", + "start_time": "2025-12-23T09:34:54.011726Z", + "end_time": "2025-12-23T09:34:54.066443Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:54.011301333Z", + "start_time": "2025-12-23T09:34:54.011374736Z", + "end_time": "2025-12-23T09:34:54.011458839Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:54.011313833Z", + "start_time": "2025-12-23T09:34:54.011381436Z", + "end_time": "2025-12-23T09:34:54.011416537Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 54, + "min_processing_ms": 54, + "max_processing_ms": 54, + "avg_processing_ms": 54, + "median_processing_ms": 54, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4285, + "slowest_section_id": 0, + "slowest_section_time_ms": 55 + } +} diff --git a/data/output/004e826261cd68380e666716704c3d4b8b625627.json b/data/output/004e826261cd68380e666716704c3d4b8b625627.json new file mode 100644 index 0000000..762e6e1 --- /dev/null +++ b/data/output/004e826261cd68380e666716704c3d4b8b625627.json @@ -0,0 +1,402 @@ +{ + "file_name": "004e826261cd68380e666716704c3d4b8b625627.txt", + "total_words": 647, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "forecast", + "count": 15 + }, + { + "word": "on", + "count": 14 + }, + { + "word": "shipping", + "count": 13 + }, + { + "word": "at", + "count": 12 + }, + { + "word": "radio", + "count": 12 + }, + { + "word": "4", + "count": 10 + }, + { + "word": "5", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "40am.", + "length": 5 + }, + { + "text": "40am.", + "length": 5 + }, + { + "text": "20am .", + "length": 6 + }, + { + "text": "It's 5.", + "length": 7 + }, + { + "text": "48am, 5.", + "length": 8 + }, + { + "text": "48am, 5.", + "length": 8 + }, + { + "text": "20am, 12.", + "length": 9 + }, + { + "text": "20am, 12.", + "length": 9 + }, + { + "text": "01pm and 5.", + "length": 11 + }, + { + "text": "01pm and 5.", + "length": 11 + }, + { + "text": "No shipping .", + "length": 13 + }, + { + "text": "Our apologies.", + "length": 14 + }, + { + "text": "'Nation in peril?", + "length": 17 + }, + { + "text": "54pm - on BBC Radio 4.", + "length": 22 + }, + { + "text": "sound of the broadcast.", + "length": 23 + }, + { + "text": "Bad start to a Friday'.", + "length": 23 + }, + { + "text": "'We're sorry about that.", + "length": 24 + }, + { + "text": "20am each day on Radio 4.", + "length": 25 + }, + { + "text": "Kirsty Connell said: 'Eep.", + "length": 26 + }, + { + "text": "woken up at the wrong time.", + "length": 27 + }, + { + "text": "forecast, no #bbcnewsbriefing.", + "length": 30 + }, + { + "text": "Broadcast four times a day - at 12.", + "length": 35 + }, + { + "text": "'We apologise for the inconvenience.", + "length": 36 + }, + { + "text": "The problem continued on into the 5.", + "length": 36 + }, + { + "text": "Listeners quickly took to Twitter to .", + "length": 38 + }, + { + "text": "' Graham Patterson said: 'Confused by .", + "length": 39 + }, + { + "text": "She said: 'Your listening to BBC Radio 4.", + "length": 41 + }, + { + "text": "' Mark Johnston tweeted: 'Nation in peril?", + "length": 42 + }, + { + "text": "' Jordan Rowland added: 'No shipping forecast?", + "length": 46 + }, + { + "text": "The service is produced by the Met Office on .", + "length": 46 + }, + { + "text": "Expect Farage to blame Brussels within the hour'.", + "length": 49 + }, + { + "text": "Isn’t that the sign of impending nuclear armageddon?", + "length": 54 + }, + { + "text": "40am and we've been broadcasting happily News Briefing.", + "length": 55 + }, + { + "text": "Despite the fact the terminology of the sea areas can .", + "length": 55 + }, + { + "text": "#radio4 failing to start broadcasting at 05:25 this morning.", + "length": 60 + }, + { + "text": "40am when presenter Kathy Clugston apologised for the glitch.", + "length": 61 + }, + { + "text": "#BBC radio fails to broadcast shipping forecast this morning.", + "length": 61 + }, + { + "text": "appear nonsensical to the uninitiated, it attracts a far wider .", + "length": 64 + }, + { + "text": "audience because of the distinctive - and some would say hypnotic - .", + "length": 69 + }, + { + "text": "The shipping forecast didn’t get broadcast on @BBCRadio4 this morning.", + "length": 72 + }, + { + "text": "express their bewilderment at the mix-up, with some left thinking they had .", + "length": 76 + }, + { + "text": "The BBC radio service, a trusted national institution, normally broadcasts at 5.", + "length": 80 + }, + { + "text": "If UK submarinies don't get shipping forecast, don't they launch nuclear attack?", + "length": 80 + }, + { + "text": "' Listeners quickly took to Twitter to express their bewilderment at the mix-up .", + "length": 81 + }, + { + "text": "Gale warnings were also issued at each station by hoisting flags to warn sailors.", + "length": 81 + }, + { + "text": "30am airing of News Briefing until it finally cut back to the Radio 4 programme at 5.", + "length": 85 + }, + { + "text": "54pm - it provides weather reports and updates on the state of the waters around Britain.", + "length": 89 + }, + { + "text": "behalf of the Maritime and Coastguard Agency and is crucial for fishermen and shipping companies.", + "length": 97 + }, + { + "text": "' The forecast, which provides reports on the seas around Britain, did eventually air at around 6.", + "length": 98 + }, + { + "text": "Glitch: The shipping forecast, which provides reports on the seas around Britain, should have aired at 5.", + "length": 105 + }, + { + "text": "Hitting rocks: BBC listeners had their morning upended when the shipping forecast failed to air on Radio 4 .", + "length": 108 + }, + { + "text": "Britain's morning was thrown into disarray today when the ever-reliable shipping forecast failed to air as expected.", + "length": 116 + }, + { + "text": "'But unfortunately due to a technical error, you've not been hearing us, so you have been hearing the BBC World Service.", + "length": 120 + }, + { + "text": "Today the forecast is produced by the Met Office for the Maritime and Coastguard Agency and airs four times daily - at 12.", + "length": 122 + }, + { + "text": "Nancetron said: 'The world service didn't hand to radio 4 so the shipping forecast didn't come on so I almost didn't leave the house.", + "length": 133 + }, + { + "text": "We're looking into it and we'll try and broadcast the shipping forecast for long-wave listeners during the course of the Today programme.", + "length": 137 + }, + { + "text": "But a technical error meant the BBC's World Service was played instead - making it the first time the forecast has failed to air since 1924.", + "length": 140 + }, + { + "text": "'The Radio 4 announcer explained the situation on air to listeners and the Today programme pointed out that the Shipping Forecast was broadcast on Radio 4 LW at 6.", + "length": 163 + }, + { + "text": "The tragedy shocked the nation and a plan was put in place by Vice-Admiral Robert FitzRoy to establish 13 instrument stations to relay readings via telegraph to London.", + "length": 168 + }, + { + "text": "The shipping forecast was created in 1861 after the deaths of 450 people on board the steam clipper Royal Charter which crashed into rocks off the North Wales coast in a hurricane.", + "length": 180 + }, + { + "text": "A Radio 4 spokesman told MailOnline: 'Unfortunately our usual switch from BBC World Service, which is broadcast on Radio 4 overnight, didn't go as planned and was delayed by around 20 minutes.", + "length": 192 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8677445650100708 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:54.511652657Z", + "first_section_created": "2025-12-23T09:34:54.511923868Z", + "last_section_published": "2025-12-23T09:34:54.512200679Z", + "all_results_received": "2025-12-23T09:34:54.572236394Z", + "output_generated": "2025-12-23T09:34:54.572451403Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:54.511923868Z", + "publish_time": "2025-12-23T09:34:54.512200679Z", + "first_worker_start": "2025-12-23T09:34:54.512665698Z", + "last_worker_end": "2025-12-23T09:34:54.571285Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:54.512669698Z", + "start_time": "2025-12-23T09:34:54.512742801Z", + "end_time": "2025-12-23T09:34:54.512816904Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:54.512909Z", + "start_time": "2025-12-23T09:34:54.513037Z", + "end_time": "2025-12-23T09:34:54.571285Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:54.512598595Z", + "start_time": "2025-12-23T09:34:54.512665698Z", + "end_time": "2025-12-23T09:34:54.512747801Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:54.512599895Z", + "start_time": "2025-12-23T09:34:54.512671398Z", + "end_time": "2025-12-23T09:34:54.5127079Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3774, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/004e9b3afeef210b8ed0d4778a44bd17c6e5d7b8.json b/data/output/004e9b3afeef210b8ed0d4778a44bd17c6e5d7b8.json new file mode 100644 index 0000000..c0ad3c6 --- /dev/null +++ b/data/output/004e9b3afeef210b8ed0d4778a44bd17c6e5d7b8.json @@ -0,0 +1,266 @@ +{ + "file_name": "004e9b3afeef210b8ed0d4778a44bd17c6e5d7b8.txt", + "total_words": 501, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "i", + "count": 17 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "it", + "count": 12 + }, + { + "word": "that", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "but", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "I have enjoyed the past six months, however.", + "length": 44 + }, + { + "text": "I also enjoyed coaching the Under 21s at times.", + "length": 47 + }, + { + "text": "It was a surprise how quickly it all turned out.", + "length": 48 + }, + { + "text": "Last time that happened he was on crutches for 12 weeks.", + "length": 56 + }, + { + "text": "Each week another team or manager seems to be under pressure.", + "length": 61 + }, + { + "text": "There are a good crop of players coming through at QPR from there.", + "length": 66 + }, + { + "text": "There were some good people at the club and I enjoyed my time there.", + "length": 68 + }, + { + "text": "My hope for the club is that they do now stay up under a new manager.", + "length": 69 + }, + { + "text": "Everyone on the coaching staff was aware that we needed better results.", + "length": 71 + }, + { + "text": "We were confident we could make sure it wasn’t us by the end of the season.", + "length": 77 + }, + { + "text": "The 57-year-old coach joined Queens Park Rangers' coaching team in August 2014 .", + "length": 80 + }, + { + "text": "Clearly as manager there has to be a degree of distance between you and the team.", + "length": 81 + }, + { + "text": "You’re always the man who has dropped half of the squad, so it’s impossible to be too close.", + "length": 96 + }, + { + "text": "But that wasn’t the case as a coach and I found I liked being that much closer to the players.", + "length": 96 + }, + { + "text": "Glenn Hoddle decided to leave QPR following Harry Redknapp's resignation from the west London outfit .", + "length": 102 + }, + { + "text": "Some offers have come in but for now I’ll take stock and work out what is best for me and my family.", + "length": 102 + }, + { + "text": "Hoddle enjoyed his time at QPR but it was only right to leave the club following Redknapp's departure .", + "length": 103 + }, + { + "text": "Harry brought me in there to do a specific job and, once he had gone, there seemed no point in staying.", + "length": 103 + }, + { + "text": "Hoddle (far left, pictured at the World Cup) will focus on his punditry work before deciding on his future .", + "length": 108 + }, + { + "text": "The club tried to bring in the right players but, as deadline day approached, it simply became harder and harder.", + "length": 113 + }, + { + "text": "I’ve heard all the theories as to why he left but I don’t believe it was related to the January transfer window.", + "length": 116 + }, + { + "text": "Harry has been in pain for some time with his knees and the news that he needed an operation was a real blow to him.", + "length": 116 + }, + { + "text": "But equally it looked as though fortunes were going to ebb and flow throughout the season among the teams towards the bottom.", + "length": 125 + }, + { + "text": "I’ve never been in a pure coaching role before and, though I was apprehensive when I went into it, I discovered that I really enjoyed it.", + "length": 139 + }, + { + "text": "As for the future, I have some business interests and I enjoy writing this column and of course I have my punditry with Sky Sports and ITV.", + "length": 139 + }, + { + "text": "That said, we were in and out of the relegation zone and I reckon if Harry had been able to stay, we would have climbed out of the bottom three once again.", + "length": 155 + }, + { + "text": "It was sad to leave Queens Park Rangers this week but once Harry Redknapp had decided that he couldn’t go on, it felt like the right thing to leave at the same time.", + "length": 167 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8299628496170044 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:55.012992822Z", + "first_section_created": "2025-12-23T09:34:55.013329935Z", + "last_section_published": "2025-12-23T09:34:55.013515643Z", + "all_results_received": "2025-12-23T09:34:55.073249145Z", + "output_generated": "2025-12-23T09:34:55.073419652Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:55.013329935Z", + "publish_time": "2025-12-23T09:34:55.013515643Z", + "first_worker_start": "2025-12-23T09:34:55.01394926Z", + "last_worker_end": "2025-12-23T09:34:55.0724Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:55.01394706Z", + "start_time": "2025-12-23T09:34:55.014006163Z", + "end_time": "2025-12-23T09:34:55.014057465Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:55.014157Z", + "start_time": "2025-12-23T09:34:55.014314Z", + "end_time": "2025-12-23T09:34:55.0724Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:55.013982462Z", + "start_time": "2025-12-23T09:34:55.014048464Z", + "end_time": "2025-12-23T09:34:55.014119467Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:55.013892558Z", + "start_time": "2025-12-23T09:34:55.01394926Z", + "end_time": "2025-12-23T09:34:55.013972861Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2574, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/004ea96265c4172e44eb2465c9d4a9b516a2937c.json b/data/output/004ea96265c4172e44eb2465c9d4a9b516a2937c.json new file mode 100644 index 0000000..4d059f1 --- /dev/null +++ b/data/output/004ea96265c4172e44eb2465c9d4a9b516a2937c.json @@ -0,0 +1,366 @@ +{ + "file_name": "004ea96265c4172e44eb2465c9d4a9b516a2937c.txt", + "total_words": 831, + "top_n_words": [ + { + "word": "in", + "count": 30 + }, + { + "word": "the", + "count": 26 + }, + { + "word": "and", + "count": 23 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "she", + "count": 15 + }, + { + "word": "with", + "count": 14 + }, + { + "word": "her", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "File picture .", + "length": 14 + }, + { + "text": "Father Brenton Emmons said: 'The .", + "length": 34 + }, + { + "text": "High taxes on recognised brands of .", + "length": 36 + }, + { + "text": "Sadly this ended in her tragic death.", + "length": 37 + }, + { + "text": "' Methanol, which is also called methyl .", + "length": 41 + }, + { + "text": "Others in her group also reported feeling ill.", + "length": 46 + }, + { + "text": "Mr Cook survived the alcohol poisoning in April .", + "length": 49 + }, + { + "text": "drinks with a locally brewed substance called arak.", + "length": 51 + }, + { + "text": "and people think they can just sleep it off,' he added .", + "length": 56 + }, + { + "text": "She had lots of friends and had raised money for charity.", + "length": 57 + }, + { + "text": "His fiancee, Michaela Pechac, watched in horror as he died.", + "length": 59 + }, + { + "text": "'That’s what Cheznye tried to do and she died as a result.", + "length": 60 + }, + { + "text": "production of formaldehyde, acetic acid and as a fuel component.", + "length": 64 + }, + { + "text": "She added: 'She was focused on what she wanted to do with her life.", + "length": 67 + }, + { + "text": "alcohol, is used in numerous industrial applications, including the .", + "length": 69 + }, + { + "text": "Mother Pamela Emmons described her daughter as 'lively and outgoing'.", + "length": 69 + }, + { + "text": "problem is the symptoms of methanol poisoning are a lot like a hangover .", + "length": 73 + }, + { + "text": "Cheznye was taken to hospital after suffering loss of sight and seizures.", + "length": 73 + }, + { + "text": "Cheznye Emmons, who died after drinking gin while travelling in Indonesia.", + "length": 74 + }, + { + "text": "wine, beer and spirits in Indonesia have prompted shop owners to mix the .", + "length": 74 + }, + { + "text": "Coroner Eleanor McGann concluded Ms Emmons died as a result of misadventure.", + "length": 76 + }, + { + "text": "An 18-year-old boy was also temporarily blinded during a trip to Bali in November.", + "length": 82 + }, + { + "text": "Ms Emmons was on a six-month trip travelling around Thailand, Indonesia and Malaysia.", + "length": 85 + }, + { + "text": "In June last year Swede Johan Lundin, 28, was poisoned by a mojito laced with methanol.", + "length": 87 + }, + { + "text": "She said: 'It is clear that she went off travelling expecting to enjoy life and to learn.", + "length": 89 + }, + { + "text": "She had been staying in the local Rain Forrest Lodge when she fell ill, the inquest heard.", + "length": 90 + }, + { + "text": "'This is being sold in shops, restaurants and bars - places where you might think it is safe.", + "length": 93 + }, + { + "text": "The alcohol had been mixed with methanol and the beautician died in hospital five days later .", + "length": 94 + }, + { + "text": "A British backpacker was fatally poisoned after drinking gin which had been mixed with methanol.", + "length": 96 + }, + { + "text": "Alan Cole, 59, and Rose Johnson, 48, died in separate incidents after consuming traditional arak.", + "length": 97 + }, + { + "text": "But the message is really \"stick to beer\" because otherwise you don’t know what risk you are taking.", + "length": 102 + }, + { + "text": "She was rushed to hospital after experiencing sudden blindness and convulsions and died five days later.", + "length": 104 + }, + { + "text": "Some, like in Cheznye's tragic case, are using methanol, which is colourless, to try and keep costs down.", + "length": 105 + }, + { + "text": "Rachel Craig, 22, from Ireland and her boyfriend Rene Puper, 23, who was Dutch, were also among the dead.", + "length": 105 + }, + { + "text": "In February 19 year-old Liam Davies from Perth died after a vodka mixer was laced with methanol in Lombok.", + "length": 106 + }, + { + "text": "In 2009 two Britons were among 25 people to die in Indonesia after drinking palm wine spiked with methanol.", + "length": 107 + }, + { + "text": "In December an Australian schoolgirl was blinded after drinking a tainted cocktail while on a trip to Bali.", + "length": 107 + }, + { + "text": "The 23-year-old beautician was drinking with friends in Bukit Lawang, northern Sumatra, when she was poisoned.", + "length": 110 + }, + { + "text": "'It’s very hard to tell the difference - sometimes there are bits floating in it, sometimes it might smell funny.", + "length": 115 + }, + { + "text": "Beautician Cheznye and her boyfriend, Joe Cook, both drank from the bottle which they had purchased in a local store.", + "length": 117 + }, + { + "text": "' Mr Emmons said: 'In a lot of these countries they mix methanol with spirits and wine without a thought for the dangers.", + "length": 121 + }, + { + "text": "Because of how such deaths are recorded abroad, the family say it is impossible to know how many others have died in the same way.", + "length": 130 + }, + { + "text": "'It was like having a chunk of your heart ripped out but we know that she’d be proud of what we’re doing to try to save lives.", + "length": 130 + }, + { + "text": "She was kept there for five days until her parents, Brenton and Pamela Emmons, were told there was no more doctors could do for her .", + "length": 133 + }, + { + "text": "' Now her parents have spoken of their determination to prevent further deaths and prevent other families experiencing such heartbreak.", + "length": 135 + }, + { + "text": "Mr Emmons said that since launching the campaign they have become aware of hundreds of other cases of people falling ill, with some dying.", + "length": 138 + }, + { + "text": "' Cheznye's parents have spoken of their determination to prevent further deaths and prevent other families experiencing such heartbreak .", + "length": 138 + }, + { + "text": "Cheznye Emmons, 23, was travelling across Indonesia with her boyfriend, Joe Cook (right) when the pair became ill after drinking gin which had methanol in it .", + "length": 159 + }, + { + "text": "The family launched their Save a Life Campaign soon after the death and soon hope to distribute 20,000 posters highlighting the dangers to doctors’ surgeries.", + "length": 160 + }, + { + "text": "Today an inquest in Chelmsford heard she had the drink, bought from a shop and with a seemingly legitimate label, with friends in Bukit Lawang, northern Sumatra.", + "length": 161 + }, + { + "text": "Beautician Cheznye Emmons, 23, from Great Wakering, Essex, died after buying the alcohol while travelling in Indonesia with her boyfriend Joe Cook in April last year.", + "length": 166 + }, + { + "text": "Her father says his daughter had not realised the danger she was in because the symptoms of methanol poisoning are similar to a hangover - so victims often attempt to just sleep it off.", + "length": 185 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7285071611404419 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:55.514281084Z", + "first_section_created": "2025-12-23T09:34:55.515627338Z", + "last_section_published": "2025-12-23T09:34:55.515870948Z", + "all_results_received": "2025-12-23T09:34:55.579037489Z", + "output_generated": "2025-12-23T09:34:55.579224696Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:55.515627338Z", + "publish_time": "2025-12-23T09:34:55.515870948Z", + "first_worker_start": "2025-12-23T09:34:55.516334267Z", + "last_worker_end": "2025-12-23T09:34:55.578087Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:55.516358168Z", + "start_time": "2025-12-23T09:34:55.516473972Z", + "end_time": "2025-12-23T09:34:55.516560576Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:55.516559Z", + "start_time": "2025-12-23T09:34:55.5167Z", + "end_time": "2025-12-23T09:34:55.578087Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:55.516271464Z", + "start_time": "2025-12-23T09:34:55.516334267Z", + "end_time": "2025-12-23T09:34:55.51641987Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:55.516289565Z", + "start_time": "2025-12-23T09:34:55.516369468Z", + "end_time": "2025-12-23T09:34:55.51641517Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4873, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/004ec2230dc739e4a29f601792c0015418cd71b5.json b/data/output/004ec2230dc739e4a29f601792c0015418cd71b5.json new file mode 100644 index 0000000..822dca6 --- /dev/null +++ b/data/output/004ec2230dc739e4a29f601792c0015418cd71b5.json @@ -0,0 +1,302 @@ +{ + "file_name": "004ec2230dc739e4a29f601792c0015418cd71b5.txt", + "total_words": 685, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "beetroot", + "count": 15 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "that", + "count": 13 + }, + { + "word": "is", + "count": 11 + }, + { + "word": "it", + "count": 9 + }, + { + "word": "with", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "50 each.", + "length": 8 + }, + { + "text": "5g fat per 100g .", + "length": 17 + }, + { + "text": "Fat-Free with typically less than 0.", + "length": 36 + }, + { + "text": "Low in salt for healthy blood pressure .", + "length": 40 + }, + { + "text": "Source of manganese for healthy bones, energy metabolism .", + "length": 58 + }, + { + "text": "Tired of trying to tempt your children into a plate of greens?", + "length": 62 + }, + { + "text": "Well one supermarket suggests you try a more colourful approach.", + "length": 64 + }, + { + "text": "' Both varieties will be sold in 450 gram packs and will cost £1.", + "length": 66 + }, + { + "text": "Low in calories with typically less than 37 calories (kcal) per 100g .", + "length": 70 + }, + { + "text": "'We're hoping that both these varieties will appeal to a whole new audience.", + "length": 76 + }, + { + "text": "A source of folic acid for a healthy immune system and to reduce tiredness .", + "length": 76 + }, + { + "text": "The Candy Stripe Beetroot is said to taste sweeter than the traditional deep purple root .", + "length": 90 + }, + { + "text": "A source of potassium to help maintain a healthy blood pressure, a healthy nervous system .", + "length": 91 + }, + { + "text": "A source of folic acid which supports the development of your baby before and during pregnancy .", + "length": 96 + }, + { + "text": "Beetroot has traditionally suffered from 'marmite syndrome' with the younger generation disliking it .", + "length": 102 + }, + { + "text": "Source of pantothenic acid for energy metabolism, to reduce tiredness, for healthy mental performance .", + "length": 103 + }, + { + "text": "Tesco are launching two new vibrant varieties of beetroot in a hope to encourage children to eat them .", + "length": 103 + }, + { + "text": "Seven slices of fresh or bottled beetroot (or three 'baby' whole beetroots) count as one of your five-a-day .", + "length": 109 + }, + { + "text": "'It looks and tastes absolutely wonderful and will definitely also bring a lot of colour and fun to the dinner table.", + "length": 117 + }, + { + "text": "' Ravi added that he hopes the new take on the unpopular veg will encourage a whole new generation of fans of the root.", + "length": 119 + }, + { + "text": "The supermarket giant will be offering two long-lost varieties, the organic candy stripe beetroot and the golden beetroot.", + "length": 122 + }, + { + "text": "It has seen a resurgence recently, but it is the juice variety that is seeing rocketing sales rather than the raw vegetable.", + "length": 124 + }, + { + "text": "Beetroots are known to be a superfood packed with nutritional benefits but they are also notoriously unpopular with children.", + "length": 125 + }, + { + "text": "However, Tesco are hoping to change this when they launch a sweeter version of the root exclusively across UK stores tomorrow.", + "length": 126 + }, + { + "text": "'We were specifically looking for varieties of beetroot that would excite the palates of a new audience of shoppers seeking healthy foods.", + "length": 138 + }, + { + "text": "G's managing director Anthony Gardiner says that these new vegetables should be enough to add a little excitement to children's meal times.", + "length": 139 + }, + { + "text": "Tesco is hoping to give exasperated parents a helping hand in getting their youngsters to eat beetroot with their latest launch, the sweetroot.", + "length": 143 + }, + { + "text": "The golden beetroot is a Victorian variety that also goes by the name of Burpee's golden, after the American seed house where it was first grown.", + "length": 145 + }, + { + "text": "Traditionally beetroot has suffered from a so-called 'Marmite syndrome', with people 50 and above enjoying it and the younger generation disliking it.", + "length": 150 + }, + { + "text": "The candy stripe variety is officially called Chiogga and is named after the northern Italian coastal town where it was first cultivated in the early 1800s.", + "length": 156 + }, + { + "text": "Tesco beetroot buyer Ravi Patel says he understands the struggle that parents go through when it comes to meal times but hopes their Candy Stripe Beetroot can help.", + "length": 164 + }, + { + "text": "The colourful veg is said to have a sweeter flavour than the dark purple original and this combined with the bright appearance is said to encourage children to eat them.", + "length": 169 + }, + { + "text": "'Every parent knows that getting youngsters to eat vegetables is never easy but we believe that the Candy Stripe variety will help make that dinner time task a lot simpler.", + "length": 172 + }, + { + "text": "'We know that beetroot is becoming more popular but thought that if we could add a sweeter profile as well as introducing different coloured varieties we could help widen this wonderful vegetable's appeal.", + "length": 205 + }, + { + "text": "' The new varieties have been grown by the UK's biggest producers of beetroot, Gs, based in Ely, Cambridgeshire, which offers perfect growing conditions of soil, sun and water to produce a sweet, full-flavoured root.", + "length": 216 + }, + { + "text": "'We've worked with our main beetroot grower to see how we could encourage more people to enjoy this wonderful vegetable and discovered that this sweeter Candy Stripe variety had generally been forgotten about and never been grown on a commercial scale before.", + "length": 259 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5756925344467163 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:56.017016505Z", + "first_section_created": "2025-12-23T09:34:56.017322917Z", + "last_section_published": "2025-12-23T09:34:56.017528025Z", + "all_results_received": "2025-12-23T09:34:56.078250668Z", + "output_generated": "2025-12-23T09:34:56.078425375Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:56.017322917Z", + "publish_time": "2025-12-23T09:34:56.017528025Z", + "first_worker_start": "2025-12-23T09:34:56.01813765Z", + "last_worker_end": "2025-12-23T09:34:56.077332Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:56.018108649Z", + "start_time": "2025-12-23T09:34:56.018175751Z", + "end_time": "2025-12-23T09:34:56.018258755Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:56.018367Z", + "start_time": "2025-12-23T09:34:56.01852Z", + "end_time": "2025-12-23T09:34:56.077332Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:56.018106149Z", + "start_time": "2025-12-23T09:34:56.018173451Z", + "end_time": "2025-12-23T09:34:56.018263455Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:56.018070847Z", + "start_time": "2025-12-23T09:34:56.01813765Z", + "end_time": "2025-12-23T09:34:56.018174751Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4086, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/004f0f8c694c4b546b29565a8993a555537ff561.json b/data/output/004f0f8c694c4b546b29565a8993a555537ff561.json new file mode 100644 index 0000000..5c0737d --- /dev/null +++ b/data/output/004f0f8c694c4b546b29565a8993a555537ff561.json @@ -0,0 +1,230 @@ +{ + "file_name": "004f0f8c694c4b546b29565a8993a555537ff561.txt", + "total_words": 280, + "top_n_words": [ + { + "word": "the", + "count": 13 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "or", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "a", + "count": 5 + }, + { + "word": "of", + "count": 5 + }, + { + "word": "are", + "count": 4 + }, + { + "word": "balls", + "count": 4 + }, + { + "word": "for", + "count": 4 + }, + { + "word": "from", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "com.", + "length": 4 + }, + { + "text": "You can read the full story here.", + "length": 33 + }, + { + "text": "Jessica Hilltout doesn't think so.", + "length": 34 + }, + { + "text": "Read the whole story at Nationalgeographic.", + "length": 43 + }, + { + "text": "Has the \"beautiful game\" ever been lovelier?", + "length": 44 + }, + { + "text": "Share your experiences in the comments section below.", + "length": 53 + }, + { + "text": "Goalposts might be made of gathered mahogany or driftwood.", + "length": 58 + }, + { + "text": "Each might last days or months on a field of gravel or hard earth.", + "length": 66 + }, + { + "text": "Are you a soccer lover who has improvised a game or its implements?", + "length": 67 + }, + { + "text": "Playing fields are arid, lush, weedy, sandy—any flattish space will do.", + "length": 73 + }, + { + "text": "Some feet are bare, others shod in fraying sneakers, boots, rubber sandals.", + "length": 75 + }, + { + "text": "Editor's note: This is an excerpt from the February issue of National Geographic magazine.", + "length": 90 + }, + { + "text": "(National Geographic) -- Miles from the main roads, in rural Africa, soccer balls bounce unevenly.", + "length": 98 + }, + { + "text": "Yet children kick and chase handmade, lopsided balls with skill and abandon, competing for pride and joy—for the sheer pleasure of playing.", + "length": 141 + }, + { + "text": "Wherever Hilltout went, she swapped the store-bought balls she kept in her car for these \"ingenious little jewels,\" most of which were made by children.", + "length": 152 + }, + { + "text": "In 2010, when the World Cup came to Africa for the first time, the Belgium-based photographer set out to see what soccer looked like far from the bright lights and big stadiums.", + "length": 177 + }, + { + "text": "\" In the 30-odd soccer-loving localities she visited, in countries from South Africa to Ivory Coast, balls are spun into being with whatever's at hand: rag or sock, tire or bark, plastic bag or inflated condom.", + "length": 210 + }, + { + "text": "What she found—over seven months, ten countries, and 12,500 miles—was a grassroots game where passion trumped poverty, a do-it-yourself ethic prospered, and one ball could \"bring happiness to an entire village.", + "length": 214 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.497285395860672 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:56.518291667Z", + "first_section_created": "2025-12-23T09:34:56.518571378Z", + "last_section_published": "2025-12-23T09:34:56.518753385Z", + "all_results_received": "2025-12-23T09:34:56.588814703Z", + "output_generated": "2025-12-23T09:34:56.588964109Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:56.518571378Z", + "publish_time": "2025-12-23T09:34:56.518753385Z", + "first_worker_start": "2025-12-23T09:34:56.519250105Z", + "last_worker_end": "2025-12-23T09:34:56.586699Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:56.519287607Z", + "start_time": "2025-12-23T09:34:56.519343009Z", + "end_time": "2025-12-23T09:34:56.51937771Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:56.519444Z", + "start_time": "2025-12-23T09:34:56.519595Z", + "end_time": "2025-12-23T09:34:56.586699Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:56.519189703Z", + "start_time": "2025-12-23T09:34:56.519250105Z", + "end_time": "2025-12-23T09:34:56.519296707Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:56.519208404Z", + "start_time": "2025-12-23T09:34:56.519250105Z", + "end_time": "2025-12-23T09:34:56.519274606Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1647, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/004f9d05655414cdf2a5133614d8d1dbaff3dcd0.json b/data/output/004f9d05655414cdf2a5133614d8d1dbaff3dcd0.json new file mode 100644 index 0000000..9270d28 --- /dev/null +++ b/data/output/004f9d05655414cdf2a5133614d8d1dbaff3dcd0.json @@ -0,0 +1,620 @@ +{ + "file_name": "004f9d05655414cdf2a5133614d8d1dbaff3dcd0.txt", + "total_words": 1188, + "top_n_words": [ + { + "word": "the", + "count": 47 + }, + { + "word": "a", + "count": 43 + }, + { + "word": "and", + "count": 34 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "garrett", + "count": 18 + }, + { + "word": "his", + "count": 17 + }, + { + "word": "is", + "count": 16 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "i", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "6.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "7.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "8.", + "length": 2 + }, + { + "text": "9.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "10.", + "length": 3 + }, + { + "text": "' They .", + "length": 8 + }, + { + "text": "' They .", + "length": 8 + }, + { + "text": "and caves.", + "length": 10 + }, + { + "text": "materials.", + "length": 10 + }, + { + "text": "and caves.", + "length": 10 + }, + { + "text": "materials.", + "length": 10 + }, + { + "text": "Players are .", + "length": 13 + }, + { + "text": "Players are .", + "length": 13 + }, + { + "text": "the landscape.", + "length": 14 + }, + { + "text": "the landscape.", + "length": 14 + }, + { + "text": "Spinninrec: 145.", + "length": 16 + }, + { + "text": "clips to YouTube.", + "length": 17 + }, + { + "text": "Katy Perry VEVO: 150.", + "length": 21 + }, + { + "text": "DisneyCollectorBR: 175.", + "length": 23 + }, + { + "text": "As a player progresses .", + "length": 24 + }, + { + "text": "As a player progresses .", + "length": 24 + }, + { + "text": "8 million monthly views .", + "length": 25 + }, + { + "text": "7 million monthly views .", + "length": 25 + }, + { + "text": "7 million monthly views .", + "length": 25 + }, + { + "text": "6 million monthly views .", + "length": 25 + }, + { + "text": "7 million monthly views .", + "length": 25 + }, + { + "text": "' Scroll down for video .", + "length": 25 + }, + { + "text": "Minecraft launched in 2009.", + "length": 27 + }, + { + "text": "Minecraft was created in 2009.", + "length": 30 + }, + { + "text": "' Minecraft was created in 2009.", + "length": 32 + }, + { + "text": "Zefr's MovieClips Trailers: 154.", + "length": 32 + }, + { + "text": "His channel now has more than 1.", + "length": 32 + }, + { + "text": "Muyap: 144 million monthly views .", + "length": 34 + }, + { + "text": "WatchMojo: 119 million monthly views .", + "length": 38 + }, + { + "text": "Eminem VEVO: 141 million monthly views .", + "length": 40 + }, + { + "text": "'My family and friends think it’s great.", + "length": 42 + }, + { + "text": "Stampylonghead: 152 million monthly views .", + "length": 43 + }, + { + "text": "Felix Arvid Ulf Kjellberg's PewDiePie: 266.", + "length": 43 + }, + { + "text": "I think I’ve got the best job in the world.", + "length": 45 + }, + { + "text": "Miley Cyrus VEVO: 123 million monthly views .", + "length": 45 + }, + { + "text": "Garrett's main audience is six to 14-year-olds.", + "length": 47 + }, + { + "text": "given blocks and tools to build towns and cities.", + "length": 49 + }, + { + "text": "given blocks and tools to build towns and cities.", + "length": 49 + }, + { + "text": "I don’t think how huge this is has really sunk in.", + "length": 52 + }, + { + "text": "He makes money pocketing a share of the advertising .", + "length": 53 + }, + { + "text": "' They can then walk around and build towns and cities.", + "length": 55 + }, + { + "text": "Players can also fly up in the air for a birds-eye view of .", + "length": 60 + }, + { + "text": "Players can also fly up in the air for a birds-eye view of .", + "length": 60 + }, + { + "text": "' The former barman’s main audience is six to 14-year-olds.", + "length": 61 + }, + { + "text": "The top 10 most viewed YouTube channels, as of January 2014 .", + "length": 61 + }, + { + "text": "they can earn advanced tools and building blocks in different .", + "length": 63 + }, + { + "text": "they can earn advanced tools and building blocks in different .", + "length": 63 + }, + { + "text": "Garrett's father came up with his Stampy Cat design, pictured .", + "length": 63 + }, + { + "text": "Garrett began playing Minecraft and uploading videos 18 months ago.", + "length": 67 + }, + { + "text": "He also has a daily meeting with his network, based in Los Angeles.", + "length": 67 + }, + { + "text": "Garrett began playing Minecraft and uploading his videos 18 months ago.", + "length": 71 + }, + { + "text": "can then walk around different terrains, including mountains, forests .", + "length": 71 + }, + { + "text": "can then walk around different terrains, including mountains, forests .", + "length": 71 + }, + { + "text": "'When those videos started to become popular, I started monetising them.", + "length": 72 + }, + { + "text": "Garrett's father, a graphic designer, came up with his Stampy Cat design.", + "length": 73 + }, + { + "text": "He also has a daily conference call with his network, based in Los Angeles.", + "length": 75 + }, + { + "text": "'It is hard to make a living because you get a small cut,' explained Garrett.", + "length": 77 + }, + { + "text": "At the start of the game, a player is put into a 'virtually infinite game world.", + "length": 80 + }, + { + "text": "At the start of the game, a player is put into a 'virtually infinite game world.", + "length": 80 + }, + { + "text": "At the start of the game, a player is put into a 'virtually infinite game world.", + "length": 80 + }, + { + "text": "He currently uploads at least one 20-minute video a day using his bedroom studio.", + "length": 81 + }, + { + "text": "Garrett is now sent, on average, 3,000 messages a day from gamers asking for tips .", + "length": 83 + }, + { + "text": "You are essentially just playing and commentating while you play, just as a fun hobby.", + "length": 86 + }, + { + "text": "That started to grow and ended up snowballing and that’s how I got to where I am now.", + "length": 87 + }, + { + "text": "'When I was at university I switched over to doing what I do now, called a 'let's play'.", + "length": 88 + }, + { + "text": "Joseph Garrett, 23, earns a living by filming himself playing Minecraft and uploading the .", + "length": 91 + }, + { + "text": "Joseph Garrett, pictured, also known as Stampylonghead has a degree in TV and video production.", + "length": 95 + }, + { + "text": "They don’t all fully understand what I do but my parents and sister are over the moon for me.", + "length": 95 + }, + { + "text": "The game was initially made for the PC but there are now Xbox 360 and mobile versions available.", + "length": 96 + }, + { + "text": "The game was initially made for the PC but there are now Xbox 360 and mobile versions available.", + "length": 96 + }, + { + "text": "The single entrepreneur gets a percentage of the money from videos with an advert in front of them.", + "length": 99 + }, + { + "text": "'I’m in the top 10 YouTube channels worldwide at the moment with 160 to 170 million views a month.", + "length": 100 + }, + { + "text": "He spends around an hour recording every day, with another 10 hours editing and managing the business.", + "length": 102 + }, + { + "text": "Figures are worked out by CPM (cost per 1,000 ad views), and the average CPM is worth around $7 (£4).", + "length": 102 + }, + { + "text": "He spends around an hour recording every day, with another 10 hours editing and managing the business.", + "length": 102 + }, + { + "text": "It’s hard to imagine a crowd of people that huge but maybe one day it’s going to eventually sink in.", + "length": 104 + }, + { + "text": "The game was initially made for the PC but there are now Xbox 360 and mobile versions available, pictured .", + "length": 107 + }, + { + "text": "Garrett, who has a degree in TV and video production, currently uploads at least one 20-minute video a day.", + "length": 107 + }, + { + "text": "The single entrepreneur, pictured, gets a percentage of the money from videos with an advert in front of them.", + "length": 110 + }, + { + "text": "Google takes around 50 per cent and the network who sources the adverts takes a share before Garrett takes a cut.", + "length": 113 + }, + { + "text": "'You need to be getting the massive figures, but if you do manage to get right to the top, there is big money there.", + "length": 116 + }, + { + "text": "Forget Justin Bieber, the latest YouTube star is a university graduate from Portsmouth who plays Minecraft full-time.", + "length": 117 + }, + { + "text": "He makes money pocketing a share of the advertising revenue from his videos and is looking to buy an official studio .", + "length": 118 + }, + { + "text": "When his channel Stampylonghead reached 10,000 subscribers he quit his job at a local pub to manage the channel full time.", + "length": 122 + }, + { + "text": "Google takes around 50 per cent, Garrett said, then the network who source the adverts takes a share before he takes a cut.", + "length": 123 + }, + { + "text": "'When you’re in your room all day looking at little numbers tick up, I think it’s hard to gauge it,' continued Garrett.", + "length": 123 + }, + { + "text": "When his channel Stampylonghead, pictured, reached 10,000 subscribers he quit his job at a local pub to manage it full time.", + "length": 124 + }, + { + "text": "Garrett said he decided to leave his job when he started earning the same amount in advertising revenue as he was at the pub.", + "length": 125 + }, + { + "text": "Garrett is now sent, on average, 3,000 messages a day from gamers who watch videos of his character Stampy Cat asking for tips.", + "length": 127 + }, + { + "text": "revenue from his videos, called ‘let's play’ clips, and is looking to buy a studio with a friend who also appears in his videos.", + "length": 132 + }, + { + "text": "Joseph Garrett, 23, pictured, earns a living by filming himself playing Minecraft from his bedroom before uploading the clips to YouTube .", + "length": 138 + }, + { + "text": "'I was lucky enough to be able to live with my parents and they were happy to let me stay rent free so I could develop this into a full-time living.", + "length": 148 + }, + { + "text": "' Garrett added that viewing numbers change regularly, but estimates channels with more than 100,000 subscribers generate enough cash for a decent living.", + "length": 154 + }, + { + "text": "' Garrett has also hinted towards expanding the business offline: 'Outside YouTube there’s merchandising and big branding deals you can get into to make more money.", + "length": 166 + }, + { + "text": "9 million subscribers, receives in excess of 5 million hits a day and is voted one of the top 10 most viewed channels worldwide - pulling more hits than One Direction and Justin Bieber.", + "length": 185 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5155590623617172 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:57.019515926Z", + "first_section_created": "2025-12-23T09:34:57.021214495Z", + "last_section_published": "2025-12-23T09:34:57.02158631Z", + "all_results_received": "2025-12-23T09:34:57.102682772Z", + "output_generated": "2025-12-23T09:34:57.102841978Z", + "total_processing_time_ms": 83, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 81, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:57.021214495Z", + "publish_time": "2025-12-23T09:34:57.021460705Z", + "first_worker_start": "2025-12-23T09:34:57.02208373Z", + "last_worker_end": "2025-12-23T09:34:57.10185Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:57.022060229Z", + "start_time": "2025-12-23T09:34:57.022142132Z", + "end_time": "2025-12-23T09:34:57.022273237Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:57.022099Z", + "start_time": "2025-12-23T09:34:57.022239Z", + "end_time": "2025-12-23T09:34:57.10185Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:57.022102531Z", + "start_time": "2025-12-23T09:34:57.022154033Z", + "end_time": "2025-12-23T09:34:57.022262237Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:57.021989626Z", + "start_time": "2025-12-23T09:34:57.02208373Z", + "end_time": "2025-12-23T09:34:57.022129732Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:57.021496406Z", + "publish_time": "2025-12-23T09:34:57.02158631Z", + "first_worker_start": "2025-12-23T09:34:57.022074729Z", + "last_worker_end": "2025-12-23T09:34:57.095288Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:57.022110231Z", + "start_time": "2025-12-23T09:34:57.022143632Z", + "end_time": "2025-12-23T09:34:57.022185134Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:57.02216Z", + "start_time": "2025-12-23T09:34:57.022296Z", + "end_time": "2025-12-23T09:34:57.095288Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:57.02209673Z", + "start_time": "2025-12-23T09:34:57.022157333Z", + "end_time": "2025-12-23T09:34:57.022197134Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:57.022030728Z", + "start_time": "2025-12-23T09:34:57.022074729Z", + "end_time": "2025-12-23T09:34:57.02209593Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 151, + "min_processing_ms": 72, + "max_processing_ms": 79, + "avg_processing_ms": 75, + "median_processing_ms": 79, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3328, + "slowest_section_id": 0, + "slowest_section_time_ms": 80 + } +} diff --git a/data/output/004faacbb234f24c8079cb2a4a760a6f889d0274.json b/data/output/004faacbb234f24c8079cb2a4a760a6f889d0274.json new file mode 100644 index 0000000..538dcda --- /dev/null +++ b/data/output/004faacbb234f24c8079cb2a4a760a6f889d0274.json @@ -0,0 +1,376 @@ +{ + "file_name": "004faacbb234f24c8079cb2a4a760a6f889d0274.txt", + "total_words": 1153, + "top_n_words": [ + { + "word": "the", + "count": 55 + }, + { + "word": "of", + "count": 30 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "credlin", + "count": 23 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "s", + "count": 22 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "abbott", + "count": 18 + }, + { + "word": "he", + "count": 16 + }, + { + "word": "in", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": ".", + "length": 1 + }, + { + "text": ".", + "length": 1 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Candace Sutton .", + "length": 16 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'You should deal with issues, not with personalities.", + "length": 53 + }, + { + "text": "'People expect better of members of parliament,' he said.", + "length": 57 + }, + { + "text": "Mr Palmer said he was not aware of Ms Credlin's personal struggles.", + "length": 67 + }, + { + "text": "'I think policies should be formulated from the party room,' he said.", + "length": 69 + }, + { + "text": "' 'I think policies should be formulated from the party room,' he said.", + "length": 71 + }, + { + "text": "But for us, it was a really interesting insight into their relationship.", + "length": 72 + }, + { + "text": "He said he had made the comments because they concerned government policy.", + "length": 74 + }, + { + "text": "' Liberal MP Dennis Jensen said staffers were not 'fair game' in parliament.", + "length": 76 + }, + { + "text": "'She gave him a swift kick in the shin,' Riley said, 'It was done with good humour.", + "length": 83 + }, + { + "text": "But the 43-year-old is widely regarded as the 'power' and the 'brains' behind Tony Abbott .", + "length": 91 + }, + { + "text": "'For him to single her out is a particularly hurtful thing for him to do,' she told Sky News.", + "length": 93 + }, + { + "text": "it was done with good humour, but for us, it was a really interesting insight into their relationship.", + "length": 102 + }, + { + "text": "Dubbed 'Ms Fix-it', Credlin was 'the most discussed woman in Canberra, and probably the most powerful'.", + "length": 103 + }, + { + "text": "'It shouldn't come down from Tony Abbott's office telling elected members of parliament what they will do.", + "length": 106 + }, + { + "text": "Foreign Minister Julie Bishop said she hoped Mr Palmer reflected on his 'hurtful' comments about Ms Credlin.", + "length": 108 + }, + { + "text": "Thr right stuff: The tall, glamorous and highly intelligent Ms Credlin is the most discussed woman in Canberra.", + "length": 111 + }, + { + "text": "Ms Credlin, 43, who has been married to Liberal Party federal director Brian Loughnane, since 2001, has been on an IVF programme.", + "length": 129 + }, + { + "text": "'It was unnecessary for Mr Palmer to be so personally vindictive in the comments he made,' he told reporters, 'he should apologise.", + "length": 131 + }, + { + "text": "'The people who to a certain extent are fair game are us as politicians, but once again it shouldn't get down to personal invective,' Dr Jensen said.", + "length": 149 + }, + { + "text": "Coalition politicians on Tuesday called Mr Palmer's comments 'sexist' and 'hurtful' because Ms Credlin has been struggling to conceive her first child.", + "length": 152 + }, + { + "text": "Struggle to conceive: Ms Credlin, 43, who has been married to Liberal Party federal director Brian Loughnane, since 2001, last year revealed their battle to have children .", + "length": 172 + }, + { + "text": "of Mr Abbott and Ms Credlin on the way back from an overseas trip in 2013 when the prime minister made some comments Ms Credlin thought were 'just a little bit out of school'.", + "length": 175 + }, + { + "text": "' A senior Liberal told Good Weekend last year that Mr Abbott, 'relies on her heavily and respects her judgment' and that 'she is the person in politics he's closest to, no doubt about that.", + "length": 190 + }, + { + "text": "Nationals MP Darren Chester said he was disgusted with Mr Palmer's comments, and worried about the tone of parliamentary debate when unelected staffers were drawn in and 'personally vilified'.", + "length": 192 + }, + { + "text": "' But the government's frontbencher Christopher Pyne has described Mr Palmer's comments as 'cowardly' and 'ignorant', saying the paid parental leave scheme was proposed before Ms Credlin worked for Mr Abbott.", + "length": 208 + }, + { + "text": "Time to question: MP Clive Palmer's criticism of Peta Credlin (pictured with Tony Abbott during question time in the House of Representatives) was that the influence she wields should not supersede that of MPs.", + "length": 210 + }, + { + "text": "Politically close: Prime Minister Tony Abbott (pictured with Peta Credlin) is politically closest to his powerful chief of staff, who even members of his own party say he relies on heavily and respects her judgment .", + "length": 216 + }, + { + "text": "Government MPs were quick to condemn Mr Palmer's comment, however Ms Credlin has previously been credited with being the 'power' and the 'brains' behind the prime minister and the person he is 'closest to politically'.", + "length": 218 + }, + { + "text": "Glamorous and influential: Peta Credlin, who posed for this shot in the January edition of Marie Claire last year, is the best known woman in Canberra and has a powerful political rapport with Prime minister Tony Abbott .", + "length": 221 + }, + { + "text": "' Nationals MP Andrew Broad, who tweeted his disapproval of the 'gutter politics' after Mr Palmer made the statement, added the comments were hurtful for couples like he and his wife who had also struggled with fertility.", + "length": 221 + }, + { + "text": "Billionaire MP Clive Palmer is standing by his criticism of the Prime Minister's chief of staff  Peta Credlin, who Palmer says has 'undue influence' over Tony Abbott and who many regard as 'the brains' and the 'power' behind the PM.", + "length": 233 + }, + { + "text": "Calling the shots: Peta Credlin's influence on Prime Minister Tony Abbott is such that journalists say when Mr Abbott made some comments Ms Credlin thought were 'just a little bit out of school' she 'gave him a swift kick in the shin.", + "length": 234 + }, + { + "text": "The normally private Ms Credlin came under fire then from the Labor Party, which claimed a member of Mr Abbott's personal staff should not use a public forum to overcome Mr Abbott's perceived popularity problem at the time with women voters.", + "length": 241 + }, + { + "text": "She told marie claire magazine in January last year the prime minister was deeply supportive of her efforts to have a child through IVF with her husband and said Mr Abbott’s views on abortion, contraception and IVF were far more balanced than many people believe.", + "length": 265 + }, + { + "text": "Won't back down: MP and mining magnate Clive Palmer has refused to apologise to Tony Abbott staffer Peta Credlin following comments he made during a parliamentary debate that she had 'undue influence' over government policy and was behind the paid parental leave scheme .", + "length": 271 + }, + { + "text": "Palmer, the MP for Fairfax in Queensland, attacked Ms Credlin in parliamentary debate on Monday, saying the Federal Government had designed the paid parental leave scheme 'just so that the Prime Minister's chief-of-staff can receive a massive benefit when she gets pregnant'.", + "length": 275 + }, + { + "text": "The 60-year-old mining magnate denied he was being sexist, telling reporters in Canberra, 'I believe as chief of staff, regardless of whether she is a woman or man, she exercises undue influence on government policy to the detriment of many of the elected members of parliament'.", + "length": 279 + }, + { + "text": "'It shouldn't come down from Tony Abbott's office telling elected members of parliament what they will do' Power behind the throne: Billionaire MP Clive Palmer's claims the government's paid parental leave scheme was designed 'just so that the Prime Minister's chief-of-staff (Peta Credlin, pictured) can receive a massive benefit when she gets pregnant' have been widely condemned as insensitive to Ms Credlin who has struggled to conceive on an IVF programme.", + "length": 461 + }, + { + "text": "Clive Palmer's comments echo previous assessments from within Tony Abbott's own party that she is 'widely believed to have more influence over the way the country is run than most' MPs, who she describes as 'the front men' Canberra correspondents, a member of Mr Abbott's own party and a senior official told Good Weekend last year Ms Credlin had more influence than many MPs and her role was more like the chief of staff of a US president, 'with cabinet-level status'.", + "length": 469 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5109208822250366 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:57.522340751Z", + "first_section_created": "2025-12-23T09:34:57.524096721Z", + "last_section_published": "2025-12-23T09:34:57.524542439Z", + "all_results_received": "2025-12-23T09:34:57.6826797Z", + "output_generated": "2025-12-23T09:34:57.682853007Z", + "total_processing_time_ms": 160, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 158, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:57.524096721Z", + "publish_time": "2025-12-23T09:34:57.524391333Z", + "first_worker_start": "2025-12-23T09:34:57.524991257Z", + "last_worker_end": "2025-12-23T09:34:57.681783Z", + "total_journey_time_ms": 157, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:57.525078961Z", + "start_time": "2025-12-23T09:34:57.525151064Z", + "end_time": "2025-12-23T09:34:57.525286469Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:57.525227Z", + "start_time": "2025-12-23T09:34:57.525411Z", + "end_time": "2025-12-23T09:34:57.681783Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 156 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:57.524947156Z", + "start_time": "2025-12-23T09:34:57.525017458Z", + "end_time": "2025-12-23T09:34:57.525109762Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:57.524932655Z", + "start_time": "2025-12-23T09:34:57.524991257Z", + "end_time": "2025-12-23T09:34:57.525039359Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:34:57.524430935Z", + "publish_time": "2025-12-23T09:34:57.524542439Z", + "first_worker_start": "2025-12-23T09:34:57.525025659Z", + "last_worker_end": "2025-12-23T09:34:57.611377Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:57.525075861Z", + "start_time": "2025-12-23T09:34:57.525174465Z", + "end_time": "2025-12-23T09:34:57.525204966Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:57.525301Z", + "start_time": "2025-12-23T09:34:57.525416Z", + "end_time": "2025-12-23T09:34:57.611377Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 85 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:57.525077161Z", + "start_time": "2025-12-23T09:34:57.525123863Z", + "end_time": "2025-12-23T09:34:57.525166364Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:57.524998658Z", + "start_time": "2025-12-23T09:34:57.525025659Z", + "end_time": "2025-12-23T09:34:57.525040659Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 241, + "min_processing_ms": 85, + "max_processing_ms": 156, + "avg_processing_ms": 120, + "median_processing_ms": 156, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3399, + "slowest_section_id": 0, + "slowest_section_time_ms": 157 + } +} diff --git a/data/output/004fc12e7cd2505a013d96e816afae3f3ce5015d.json b/data/output/004fc12e7cd2505a013d96e816afae3f3ce5015d.json new file mode 100644 index 0000000..d17430c --- /dev/null +++ b/data/output/004fc12e7cd2505a013d96e816afae3f3ce5015d.json @@ -0,0 +1,214 @@ +{ + "file_name": "004fc12e7cd2505a013d96e816afae3f3ce5015d.txt", + "total_words": 342, + "top_n_words": [ + { + "word": "the", + "count": 36 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "indian", + "count": 9 + }, + { + "word": "olympic", + "count": 9 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "a", + "count": 7 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "that", + "count": 7 + }, + { + "word": "ioc", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "India responded positively to the announcement.", + "length": 47 + }, + { + "text": "CNN's Harmeet Shah Singh and Khushbu Shah contributed to this report.", + "length": 69 + }, + { + "text": "At the opening ceremony last week, India's delegation of three athletes had marched under an IOC flag.", + "length": 102 + }, + { + "text": "The reinstatement raises to 89 the number of countries and territories participating in the Sochi Games.", + "length": 104 + }, + { + "text": "The situation remained deadlocked for months, as the Indian association refused to bow to the IOC's demands for changes.", + "length": 120 + }, + { + "text": "\" Narayna Ramachandran, the president of the World Squash Federation, was voted in as the new president of the Indian association.", + "length": 130 + }, + { + "text": "But the IOC said Tuesday that it had ended the suspension following the Indian association's general assembly and elections for a new board on Sunday.", + "length": 150 + }, + { + "text": "(CNN) -- The International Olympic Committee has reinstated India, allowing its athletes to once again compete under their country's flag after a ban of more than a year.", + "length": 170 + }, + { + "text": "\"It is the first time in Olympic history that a suspension of an NOC (National Olympic Committee) has been lifted during an Olympic Games,\" the IOC said in a statement Tuesday.", + "length": 176 + }, + { + "text": "India was suspended from the Olympic fold in December 2012 after the Indian Olympic Association elected Lalit Bhanot, who spent 11 months in jail on corruption charges, to a top post.", + "length": 183 + }, + { + "text": "An IOC delegation that observed the elections reported that they complied with the requirement that \"no person convicted or charge-framed can run for a position within the organization.", + "length": 185 + }, + { + "text": "\"To symbolically mark the lifting of the suspension and in recognition of the three Indian athletes competing in Sochi, the Indian flag will be raised in the Olympic Village,\" the IOC said Tuesday.", + "length": 197 + }, + { + "text": "The decision means that Indian athletes can now compete for India's Olympic committee and walk behind their national flag at the closing ceremony of the Winter Games in Sochi on 23 February, the statement said.", + "length": 210 + }, + { + "text": "\"I am happy that suspension is over and now Indian teams and players will take part in the International events under the national flag,\" said Vijay Kumar Malhotra, the former acting president of Indian Olympic Association.", + "length": 223 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.461963415145874 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:58.02530308Z", + "first_section_created": "2025-12-23T09:34:58.025596192Z", + "last_section_published": "2025-12-23T09:34:58.025775299Z", + "all_results_received": "2025-12-23T09:34:58.083786833Z", + "output_generated": "2025-12-23T09:34:58.083916338Z", + "total_processing_time_ms": 58, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:58.025596192Z", + "publish_time": "2025-12-23T09:34:58.025775299Z", + "first_worker_start": "2025-12-23T09:34:58.026218517Z", + "last_worker_end": "2025-12-23T09:34:58.082933Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:58.026195516Z", + "start_time": "2025-12-23T09:34:58.026253119Z", + "end_time": "2025-12-23T09:34:58.02628792Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:58.02645Z", + "start_time": "2025-12-23T09:34:58.0266Z", + "end_time": "2025-12-23T09:34:58.082933Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:58.026172815Z", + "start_time": "2025-12-23T09:34:58.026218517Z", + "end_time": "2025-12-23T09:34:58.02627592Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:58.026184616Z", + "start_time": "2025-12-23T09:34:58.026252819Z", + "end_time": "2025-12-23T09:34:58.026267619Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2078, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/005018d0d086f4566f11e11f340b2fadb88db369.json b/data/output/005018d0d086f4566f11e11f340b2fadb88db369.json new file mode 100644 index 0000000..fa43500 --- /dev/null +++ b/data/output/005018d0d086f4566f11e11f340b2fadb88db369.json @@ -0,0 +1,330 @@ +{ + "file_name": "005018d0d086f4566f11e11f340b2fadb88db369.txt", + "total_words": 667, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "it", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "was", + "count": 12 + }, + { + "word": "i", + "count": 11 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "only", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Mark Duell .", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "It was my baby.", + "length": 15 + }, + { + "text": "10:52 EST, 11 July 2013 .", + "length": 25 + }, + { + "text": "13:24 EST, 11 July 2013 .", + "length": 25 + }, + { + "text": "He has owned the venue for 23 years.", + "length": 36 + }, + { + "text": "‘I won’t be able to get another one.", + "length": 40 + }, + { + "text": "‘It only ever did 200 miles a year max.", + "length": 41 + }, + { + "text": "I had spent hundreds of pounds renovating it.", + "length": 45 + }, + { + "text": "’ Mr Waterson’s van was a familiar sight .", + "length": 46 + }, + { + "text": "I’m absolutely distraught' Michael Waterson .", + "length": 47 + }, + { + "text": "My hobby has always been with cars and I love Only Fools.", + "length": 57 + }, + { + "text": "’ The British-built Reliant Robin was produced from 1973 to 2002.", + "length": 67 + }, + { + "text": "They are so hard to get hold of - that’s why I kept it for so long.", + "length": 69 + }, + { + "text": "'There’s absolutely nothing left at all except the chassis and two seats.", + "length": 75 + }, + { + "text": "‘There’s absolutely nothing left at all except the chassis and two seats.", + "length": 77 + }, + { + "text": "It wasn’t the nicest van to drive, and they had an awful image with reliability.", + "length": 82 + }, + { + "text": "'I’m absolutely distraught and now I’m desperately looking to find another one.", + "length": 83 + }, + { + "text": "Fire crews put out the blaze and left the sorry remains of the chassis and two seats.", + "length": 85 + }, + { + "text": "He added: ‘Many people parked outside my pub to have photographs taken with the car.", + "length": 86 + }, + { + "text": "Fire crews put out the blaze and left the sorry remains of the chassis and two seats .", + "length": 86 + }, + { + "text": "I had it for 20 years but only ever took it out on special occasions to promote the pub.", + "length": 88 + }, + { + "text": "The whole thing happened so quickly and I knew straight away that something was not right.", + "length": 90 + }, + { + "text": "by Sir David Jason, and his hopeless brother Rodney Trotter, played by Nicholas Lyndhurst.", + "length": 90 + }, + { + "text": "Mr Waterson, who paid £700 for the van, said: ‘It is very sad - it was a genuine supervan.", + "length": 93 + }, + { + "text": "A North Yorkshire Police spokesman said the circumstances around the fire were being investigated.", + "length": 98 + }, + { + "text": "It’s the sort of drama that could have quite easily come straight from the comedy classic itself.", + "length": 99 + }, + { + "text": "Only Fools and Horses ran from 1981 to 2003 and featured the many failed ventures of Del Boy, played .", + "length": 102 + }, + { + "text": "Smoking: It's the sort of drama that could have quite easily come straight from the comedy classic itself .", + "length": 107 + }, + { + "text": "outside the bar, which is full of other memorabilia from Only Fools and Horses, the long-running BBC comedy.", + "length": 108 + }, + { + "text": "A lot of people have asked me what’s happened to it and when I’ve told them they have all been devastated.", + "length": 110 + }, + { + "text": "He got out just as smoke started billowing from the engine and watched helplessly as his van went up in flames.", + "length": 111 + }, + { + "text": "The model also appeared in Mr Bean and its notorious instability was the subject of a feature on BBC show Top Gear in 2010.", + "length": 123 + }, + { + "text": "All that's left: He got out just as smoke started billowing from the engine and watched helplessly as his van went up in flames.", + "length": 128 + }, + { + "text": "A Reliant Robin van went up in flames while it was being driven by Only Fools and Horses fan Michael Waterson, who had owned a replica of the famous yellow three-wheeler for 20 years.", + "length": 183 + }, + { + "text": "The 65-year-old used the vehicle to promote his Trotters Bar - but smelled petrol as he was driving along in Harrogate, North Yorkshire, before the car spluttered to a halt and stopped traffic.", + "length": 193 + }, + { + "text": "Burning: A Reliant Robin van went up in flames while it was being driven by Only Fools and Horses fan Michael Waterson, who had owned a replica of the famous yellow three-wheeler for 20 years .", + "length": 193 + }, + { + "text": "Classic show: Uncle Albert (Buster Merryfield), Del Boy (Sir David Jason) and Rodney (Nicholas Lyndhurst) pose with the original Trotters Independent Trading Reliant Robin van in Only Fools and Horses .", + "length": 202 + }, + { + "text": "'I'm distraught': Mr Waterson said he was 'very sad' as the Reliant Robin vehicle was a 'genuine supervan' Response: North Yorkshire Police said the circumstances around the fire were being investigated .", + "length": 204 + }, + { + "text": "Up in flames: The 65-year-old used the vehicle to promote his Trotters Bar - but smelled petrol as he was driving along in Harrogate, North Yorkshire, before the car spluttered to a halt and stopped traffic .", + "length": 208 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4226994216442108 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:58.52652294Z", + "first_section_created": "2025-12-23T09:34:58.526873454Z", + "last_section_published": "2025-12-23T09:34:58.527112164Z", + "all_results_received": "2025-12-23T09:34:58.599274366Z", + "output_generated": "2025-12-23T09:34:58.599403071Z", + "total_processing_time_ms": 72, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 72, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:58.526873454Z", + "publish_time": "2025-12-23T09:34:58.527112164Z", + "first_worker_start": "2025-12-23T09:34:58.527530681Z", + "last_worker_end": "2025-12-23T09:34:58.596693Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:58.527542281Z", + "start_time": "2025-12-23T09:34:58.527621184Z", + "end_time": "2025-12-23T09:34:58.527706088Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:58.527762Z", + "start_time": "2025-12-23T09:34:58.527904Z", + "end_time": "2025-12-23T09:34:58.596693Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:58.527553882Z", + "start_time": "2025-12-23T09:34:58.527627785Z", + "end_time": "2025-12-23T09:34:58.527709088Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:58.527466778Z", + "start_time": "2025-12-23T09:34:58.527530681Z", + "end_time": "2025-12-23T09:34:58.527560182Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3698, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/00504275ede73591d94a6c1f994fd4856610421c.json b/data/output/00504275ede73591d94a6c1f994fd4856610421c.json new file mode 100644 index 0000000..39708ae --- /dev/null +++ b/data/output/00504275ede73591d94a6c1f994fd4856610421c.json @@ -0,0 +1,330 @@ +{ + "file_name": "00504275ede73591d94a6c1f994fd4856610421c.txt", + "total_words": 625, + "top_n_words": [ + { + "word": "mother", + "count": 38 + }, + { + "word": "the", + "count": 30 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "o", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "mine", + "count": 11 + }, + { + "word": "and", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Some are very long.", + "length": 19 + }, + { + "text": "Some are very short.", + "length": 20 + }, + { + "text": "Well, we're here to help you.", + "length": 29 + }, + { + "text": "\"Kaddish\" by Allen Ginsberg .", + "length": 29 + }, + { + "text": "Let us know in the comments below.", + "length": 34 + }, + { + "text": "\"Mother to Son\" by Langston Hughes .", + "length": 36 + }, + { + "text": "\"Mother o' Mine\" by Rudyard Kipling .", + "length": 37 + }, + { + "text": "\"To My Mother\" by Christina Rosetti .", + "length": 37 + }, + { + "text": "© Copyright 2010 Mother Nature Network .", + "length": 41 + }, + { + "text": "\"To My Mother\" by Robert Louis Stevenson .", + "length": 42 + }, + { + "text": "\" Do you have a favorite Mother's Day poem?", + "length": 43 + }, + { + "text": "Many address the poet's memories of his mother.", + "length": 47 + }, + { + "text": "Mother Nature Network: Mother's Day song guide .", + "length": 48 + }, + { + "text": "Kasdorf is the second contemporary poet on our list.", + "length": 52 + }, + { + "text": "\" \"What I Learned From My Mother\" by Julia Kasdorf .", + "length": 52 + }, + { + "text": "Others describe the poet's gratitude for his mother.", + "length": 52 + }, + { + "text": "\"Thanking My Mother for Piano Lessons\" by Diane Wakoski .", + "length": 57 + }, + { + "text": "Mother Nature Network: Kids' Mother's Day crafts projects .", + "length": 59 + }, + { + "text": "\" It can be found in \"The Collected Poems of Langston Hughes.", + "length": 61 + }, + { + "text": "Other times, it's a straightforward message of love and gratitude.", + "length": 66 + }, + { + "text": "If I were drowned in the deepest sea, Mother o' mine, O mother o' mine!", + "length": 71 + }, + { + "text": "If I were hanged on the highest hill, Mother o' mine, O mother o' mine!", + "length": 71 + }, + { + "text": "The Scottish poet evokes childhood memories in this four-line ode to Mom.", + "length": 73 + }, + { + "text": "I know whose love would follow me still, Mother o' mine, O mother o' mine!", + "length": 74 + }, + { + "text": "(Mother Nature Network) -- Mother's Day poems come in all shapes and sizes.", + "length": 75 + }, + { + "text": "I know whose tears would come down to me, Mother o' mine, O mother o' mine!", + "length": 75 + }, + { + "text": "Here's a list of selected works to get you started with Mother's Day poems: .", + "length": 77 + }, + { + "text": "It was published as part of a collection, \"Kaddish and Other Poems: 1958-1960.", + "length": 78 + }, + { + "text": "And may you happy live, And long us bless; Receiving as you give Great happiness.", + "length": 81 + }, + { + "text": "To-day's your natal day; Sweet flowers I bring: Mother, accept, I pray My offering.", + "length": 83 + }, + { + "text": "\" Its title refers to the traditional Jewish prayer recited during times of mourning.", + "length": 85 + }, + { + "text": "If I were damned of body and soul, I know whose prayers would make me whole, Mother o' mine, O mother o' mine!", + "length": 110 + }, + { + "text": "Sometimes the mother-child relationship is complicated and the poet discusses the good times along with the bad.", + "length": 112 + }, + { + "text": "The Nobel laureate, who lived from 1865 to 1936, wrote about the undying love of a mother in this 11-line poem: .", + "length": 113 + }, + { + "text": "If nothing else, the poet almost always acknowledges the significant role a mother plays in the lives of her children.", + "length": 118 + }, + { + "text": "Ginsberg, one of the leading voices of the Beat Generation, wrote this lengthy poem following the 1956 death of his mother.", + "length": 123 + }, + { + "text": "It appeared \"A Child's Garden of Verses,\" a collection of 65 poems by Stevenson first published in 1885 under the title \"Penny Whistles.", + "length": 136 + }, + { + "text": "\" You too, my mother, read my rhymes For love of unforgotten times, And you may chance to hear once more The little feet along the floor.", + "length": 137 + }, + { + "text": "She talks about how her mother taught her to comfort those in mourning, to offer healing and \"the blessing of your voice, your chaste touch.", + "length": 140 + }, + { + "text": "Rosetti, a 19th century English poet best known for her lengthy poem called \"Goblin's Market,\" wrote this short piece about her mother in 1842: .", + "length": 145 + }, + { + "text": "Needless-to-say, with Mom being portrayed so many different ways in poetry, there are several avenues for approaching a selection of poems appropriate for Mother's Day.", + "length": 168 + }, + { + "text": "This lesser-known piece by the Harlem Renaissance writer takes the perspective of the mother speaking to her son and telling him that \"Life for me ain't been no crystal stair.", + "length": 175 + }, + { + "text": "Wakoski, a contemporary poet who counts Allen Ginsberg among her influences, writes of the \"beauty that can come from even an ugly past\" in this poem that recounts, among other things, the financial struggles her mother accepted in order to make sure she could pay for her child's piano lessons.", + "length": 295 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4958558678627014 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:59.028176616Z", + "first_section_created": "2025-12-23T09:34:59.030102794Z", + "last_section_published": "2025-12-23T09:34:59.030289302Z", + "all_results_received": "2025-12-23T09:34:59.089770516Z", + "output_generated": "2025-12-23T09:34:59.089945523Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:59.030102794Z", + "publish_time": "2025-12-23T09:34:59.030289302Z", + "first_worker_start": "2025-12-23T09:34:59.030761421Z", + "last_worker_end": "2025-12-23T09:34:59.088882Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:59.03072452Z", + "start_time": "2025-12-23T09:34:59.030794422Z", + "end_time": "2025-12-23T09:34:59.030865125Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:59.030932Z", + "start_time": "2025-12-23T09:34:59.031072Z", + "end_time": "2025-12-23T09:34:59.088882Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:59.030716319Z", + "start_time": "2025-12-23T09:34:59.030787722Z", + "end_time": "2025-12-23T09:34:59.030874426Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:59.030693618Z", + "start_time": "2025-12-23T09:34:59.030761421Z", + "end_time": "2025-12-23T09:34:59.030788122Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3513, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/00506716b79f302b29c8843a4941a93a30607283.json b/data/output/00506716b79f302b29c8843a4941a93a30607283.json new file mode 100644 index 0000000..d7a81a6 --- /dev/null +++ b/data/output/00506716b79f302b29c8843a4941a93a30607283.json @@ -0,0 +1,322 @@ +{ + "file_name": "00506716b79f302b29c8843a4941a93a30607283.txt", + "total_words": 744, + "top_n_words": [ + { + "word": "the", + "count": 35 + }, + { + "word": "a", + "count": 34 + }, + { + "word": "in", + "count": 22 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "abdominal", + "count": 16 + }, + { + "word": "baby", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "was", + "count": 15 + }, + { + "word": "pregnancies", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "7kg).", + "length": 5 + }, + { + "text": "mother.", + "length": 7 + }, + { + "text": "mother dying.", + "length": 13 + }, + { + "text": "Unlike with other ectopic .", + "length": 27 + }, + { + "text": "The little girl weighed 3lbs 7oz (1.", + "length": 36 + }, + { + "text": "Babies born after abdominal pregnancies .", + "length": 41 + }, + { + "text": "They can also result in anaemia and infection.", + "length": 46 + }, + { + "text": "The baby was successfully delivered during surgery.", + "length": 51 + }, + { + "text": "An abdominal pregnancy is a form of ectopic pregnancy.", + "length": 54 + }, + { + "text": "result of there being no amniotic fluid as a protective buffer.", + "length": 63 + }, + { + "text": "But they are dangerous as they can cause severe bleeding in the .", + "length": 65 + }, + { + "text": "an abdominal pregnancy but there is also an increased risk of the .", + "length": 67 + }, + { + "text": "They treated her for these conditions and her symptoms eased a little.", + "length": 70 + }, + { + "text": "have a high rate of birth defects because they can be compressed as a .", + "length": 71 + }, + { + "text": "pregnancies, there is a chance that a viable baby will be born following .", + "length": 74 + }, + { + "text": "A 22-year-old has given birth to a healthy baby girl who grew outside her womb.", + "length": 79 + }, + { + "text": "Abdominal pregnancies are dangerous as they can cause severe bleeding in the mother.", + "length": 84 + }, + { + "text": "If this does not happen, the baby will die and can become calcified inside the mother.", + "length": 86 + }, + { + "text": "If this does not happen, the baby will die and can become calcified inside the mother.", + "length": 86 + }, + { + "text": "Abdominal pregnancies are thought to account for about one in every 10,000 pregnancies.", + "length": 87 + }, + { + "text": "Abdominal pregnancies are thought to account for about one in every 10,000 pregnancies.", + "length": 87 + }, + { + "text": "She was also noted to have a normal, but empty, womb and normal ovaries and fallopian tubes.", + "length": 92 + }, + { + "text": "She had already had two antenatal check-ups after which she was told the pregnancy was progressing normally.", + "length": 108 + }, + { + "text": "She also reported having difficulty urinating, according to a report of her case in the journal BioMed Central.", + "length": 111 + }, + { + "text": "However, a second scan two days later showed that the woman’s baby was actually floating in her abdominal cavity.", + "length": 115 + }, + { + "text": "The woman, in Tanzania, only discovered when she was 32 weeks pregnant that her baby was developing in her abdomen.", + "length": 115 + }, + { + "text": "She added: 'It's very rare, but you have to keep it in your mind when examining a pregnant woman who has abdominal pain.", + "length": 120 + }, + { + "text": "A woman with an abdominal pregnancy will not be able to give birth naturally and the baby will have to be delivered surgically.", + "length": 127 + }, + { + "text": "A woman with an abdominal pregnancy will not be able to give birth naturally and the baby will have to be delivered surgically.", + "length": 127 + }, + { + "text": "Abdominal pregnancies are rare forms of ectopic pregnancies which occur when the baby implants in the abdomen instead of the womb.", + "length": 130 + }, + { + "text": "Ectopic pregnancies occur when a fertilised egg implants outside the womb - usually in the fallopian tubes, but occasionally in the abdomen.", + "length": 140 + }, + { + "text": "The mother had to be given a blood transfusion because she had severe anaemia but she and the baby were both discharged from hospital healthy.", + "length": 142 + }, + { + "text": "Abdominal pregnancies usually occur as a result of a fallopian tube rupturing or expelling a fertilised egg that has first implanted in the tube.", + "length": 145 + }, + { + "text": "It is thought that the fertilised egg initially implanted in her fallopian tube from where it was expelled, allowing it to implant again in her abdomen.", + "length": 152 + }, + { + "text": "The patient was found to be carrying a viable baby even though it was not in a protective amniotic sac and was not surrounded by the normal amniotic fluid.", + "length": 155 + }, + { + "text": "They are often not picked up on ultrasound scans and usually occur as a result of a fallopian tube rupturing or expelling a fertilised egg that has implanted in the tube.", + "length": 170 + }, + { + "text": "She had an ultrasound scan in Mwanza which appeared to be normal and doctors assumed the pregnancy was textbook but that the mother had anaemia and a urinary tract infection.", + "length": 174 + }, + { + "text": "Unlike with other ectopic pregnancies, there is a chance that a viable baby will be born following an abdominal pregnancy but there is also an increased risk of the mother dying.", + "length": 178 + }, + { + "text": "'I've seen maybe four or five abdominal pregnancies over the course of 25 years,' Dr Jill Rabin, chief of ambulatory care, obstetrics and gynecology at Long Island Jewish Medical Center in New York, told Live Science.", + "length": 217 + }, + { + "text": "A 22-year-old has given birth to a healthy baby that grew outside her womb, in her abdomen (file picture) The woman was referred to a hospital in Mwanza, Tanzania, after complaining of abdominal pain and noticing that her baby was not moving very much.", + "length": 252 + }, + { + "text": "The baby girl was delivered surgically and she and her mother were both released from hospital healthy after the operation (file picture) Babies born after abdominal pregnancies have a high rate of birth defects because they can be compressed as a result of there being no amniotic fluid as a protective buffer.", + "length": 311 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6285825967788696 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:34:59.531063825Z", + "first_section_created": "2025-12-23T09:34:59.5329114Z", + "last_section_published": "2025-12-23T09:34:59.533116808Z", + "all_results_received": "2025-12-23T09:34:59.600647449Z", + "output_generated": "2025-12-23T09:34:59.600843157Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:34:59.5329114Z", + "publish_time": "2025-12-23T09:34:59.533116808Z", + "first_worker_start": "2025-12-23T09:34:59.533596428Z", + "last_worker_end": "2025-12-23T09:34:59.599575Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:34:59.533590727Z", + "start_time": "2025-12-23T09:34:59.533692632Z", + "end_time": "2025-12-23T09:34:59.533772135Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:34:59.533837Z", + "start_time": "2025-12-23T09:34:59.533982Z", + "end_time": "2025-12-23T09:34:59.599575Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:34:59.533566926Z", + "start_time": "2025-12-23T09:34:59.53364663Z", + "end_time": "2025-12-23T09:34:59.533746734Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:34:59.533524425Z", + "start_time": "2025-12-23T09:34:59.533596428Z", + "end_time": "2025-12-23T09:34:59.533631929Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4320, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/005073ebe375befd66b69e593179259ba32aab41.json b/data/output/005073ebe375befd66b69e593179259ba32aab41.json new file mode 100644 index 0000000..b6258fe --- /dev/null +++ b/data/output/005073ebe375befd66b69e593179259ba32aab41.json @@ -0,0 +1,382 @@ +{ + "file_name": "005073ebe375befd66b69e593179259ba32aab41.txt", + "total_words": 668, + "top_n_words": [ + { + "word": "the", + "count": 35 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "work", + "count": 17 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "her", + "count": 12 + }, + { + "word": "was", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "she", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "work.", + "length": 5 + }, + { + "text": "Asda.", + "length": 5 + }, + { + "text": "extent.", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Unemployed .", + "length": 12 + }, + { + "text": "Nick Mcdermott .", + "length": 16 + }, + { + "text": "The case continues.", + "length": 19 + }, + { + "text": "Lawyers for the pair .", + "length": 22 + }, + { + "text": "03:10 EST, 26 June 2012 .", + "length": 25 + }, + { + "text": "04:55 EST, 27 June 2012 .", + "length": 25 + }, + { + "text": "45-a-week jobseeker’s allowance.", + "length": 34 + }, + { + "text": "is equivalent to ‘slave labour’.", + "length": 36 + }, + { + "text": "Nathalie Lieven, QC, appearing for .", + "length": 36 + }, + { + "text": "The Government is fighting her claim .", + "length": 38 + }, + { + "text": "Miss Reilly, who graduated in geology .", + "length": 39 + }, + { + "text": "suggestion of her Jobcentre Plus adviser.", + "length": 41 + }, + { + "text": "However, she says, that never materialised.", + "length": 43 + }, + { + "text": "experience in the public, private and charity sectors.", + "length": 54 + }, + { + "text": "Human Rights, which prohibits forced labour and slavery.", + "length": 56 + }, + { + "text": "mechanic Jamieson Wilson, 41, is also challenging the legality of .", + "length": 67 + }, + { + "text": "attending a retail jobs ‘open day’ in October last year at the .", + "length": 68 + }, + { + "text": "from Birmingham University in 2010, was placed on the scheme after .", + "length": 68 + }, + { + "text": "work did not contribute to (Miss Reilly’s) search for work to any .", + "length": 69 + }, + { + "text": "Miss Reilly, said her placement at Poundland, near her home in Kings .", + "length": 70 + }, + { + "text": "programme, saying it violates Article 4 of the European Convention on .", + "length": 71 + }, + { + "text": "schemes, championed by Employment Minister Chris Grayling, aim to help .", + "length": 72 + }, + { + "text": "and strongly denies that its flagship work programme, which has placed .", + "length": 72 + }, + { + "text": "another Government work scheme that compels the jobless to take unpaid .", + "length": 72 + }, + { + "text": "are seeking a judicial review into the Department for Work and Pensions .", + "length": 73 + }, + { + "text": "250,000 young people over the next two years through training and unpaid .", + "length": 74 + }, + { + "text": "thousands of jobseekers on unpaid placements since its launch last year, .", + "length": 74 + }, + { + "text": "Placement-providers include chains such as Tesco, Sainsbury’s, Argos and .", + "length": 76 + }, + { + "text": "Heath, Birmingham, involved carrying out ‘menial’ tasks, adding: ‘Such .", + "length": 78 + }, + { + "text": "Prestigious: Cait Reilly, 22, is a geology graduate from Birmingham University .", + "length": 80 + }, + { + "text": "Miss Lieven added: ‘These claimants have an entitlement to subsistence-level benefits.", + "length": 88 + }, + { + "text": "’ The court heard that Mr Wilson, also from the Midlands, had been unemployed since 2008.", + "length": 91 + }, + { + "text": "Paul Nicholls, QC, appearing for the DWP, argued that both legal challenges were ‘wrong in law’.", + "length": 100 + }, + { + "text": "Only after attending was she told she had to undertake a period of training or risk losing her benefits.", + "length": 104 + }, + { + "text": "After refusing, he had his benefits cut, and now faces the loss of his jobseeker’s allowance for six months.", + "length": 110 + }, + { + "text": "’ Miss Reilly had undertaken the work under ‘menace of penalty’ in breach of her human rights, she added.", + "length": 111 + }, + { + "text": "He was told last November he would be required to undertake up to six months of unpaid work cleaning furniture.", + "length": 111 + }, + { + "text": "Unhappy: Cait Reilly is shown outside the Poundland store in Birmingham where she claims she was forced to work .", + "length": 113 + }, + { + "text": "Cait Reilly, pictured, yesterday asked the High Court to declare the Government¿s back-to-work programme unlawful .", + "length": 116 + }, + { + "text": "The court was told Poundland is a successful firm with a net turnover of £500million, and her placement did not contribute to the public interest.", + "length": 147 + }, + { + "text": "'Taking away these subsistence-level benefits for up to six months is in many ways just as fundamental as being detained as an immigrant in prison.", + "length": 147 + }, + { + "text": "Legal challenge: Cait Reilly, pictured, was 'forced' to leave her voluntary work at a museum to stack shelves at Poundland or risk losing her benefits .", + "length": 152 + }, + { + "text": "He said the purpose of the schemes was to assist the long-term jobless to better equip themselves for work, and thousands of people in different situations had benefited.", + "length": 170 + }, + { + "text": "The 23-year-old said she had to give up a voluntary post in a museum to take the placement but was promised a job interview if she completed two weeks training at Poundland.", + "length": 173 + }, + { + "text": "A graduate made to work for her jobless benefits as a shelf stacker in Poundland yesterday claimed the scheme amounted to ‘forced labour’ which breached her human rights.", + "length": 174 + }, + { + "text": "Cait Reilly yesterday asked the High Court to declare the Government’s back-to-work programme unlawful after she was required to work in the budget store or risk losing her £53.", + "length": 180 + }, + { + "text": "She also accused the Government of ‘seemingly sub-delegating to private sector providers’ how the schemes were run, including whether those who refused unpaid work should lose benefits.", + "length": 189 + }, + { + "text": "Miss Lieven told Mr Justice Foskett the way in which the back-to-work programme was administered was ‘blatantly unlawful’, and had put thousands at risk of unfairly losing their benefits.", + "length": 191 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5886648893356323 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:00.033906932Z", + "first_section_created": "2025-12-23T09:35:00.034303248Z", + "last_section_published": "2025-12-23T09:35:00.034522457Z", + "all_results_received": "2025-12-23T09:35:00.103638862Z", + "output_generated": "2025-12-23T09:35:00.103800368Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:00.034303248Z", + "publish_time": "2025-12-23T09:35:00.034522457Z", + "first_worker_start": "2025-12-23T09:35:00.034984375Z", + "last_worker_end": "2025-12-23T09:35:00.102744Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:00.034921773Z", + "start_time": "2025-12-23T09:35:00.034984375Z", + "end_time": "2025-12-23T09:35:00.035066779Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:00.035241Z", + "start_time": "2025-12-23T09:35:00.035407Z", + "end_time": "2025-12-23T09:35:00.102744Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:00.034930873Z", + "start_time": "2025-12-23T09:35:00.035009677Z", + "end_time": "2025-12-23T09:35:00.03509758Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:00.034972775Z", + "start_time": "2025-12-23T09:35:00.035046578Z", + "end_time": "2025-12-23T09:35:00.03508358Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4150, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/00508bfc7b036d23a6d767716d71d698a5129e8d.json b/data/output/00508bfc7b036d23a6d767716d71d698a5129e8d.json new file mode 100644 index 0000000..b82e183 --- /dev/null +++ b/data/output/00508bfc7b036d23a6d767716d71d698a5129e8d.json @@ -0,0 +1,206 @@ +{ + "file_name": "00508bfc7b036d23a6d767716d71d698a5129e8d.txt", + "total_words": 231, + "top_n_words": [ + { + "word": "the", + "count": 15 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "were", + "count": 7 + }, + { + "word": "a", + "count": 5 + }, + { + "word": "in", + "count": 5 + }, + { + "word": "school", + "count": 4 + }, + { + "word": "students", + "count": 4 + }, + { + "word": "at", + "count": 3 + }, + { + "word": "chemistry", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "It can also cause eye and skin irritation.", + "length": 42 + }, + { + "text": "Everyone on site was escorted to the middle school building.", + "length": 60 + }, + { + "text": "The hospitalized patients were also released and are said to be doing 'fine'.", + "length": 77 + }, + { + "text": "The chemistry lab in question will remain closed as investigation and clean-up continues.", + "length": 89 + }, + { + "text": "Shortly after 9am students were given the go-ahead to return to classrooms and resume their schedule.", + "length": 101 + }, + { + "text": "They had reportedly been conducting tests with lauric acid which can cause respiratory problems if inhaled.", + "length": 107 + }, + { + "text": "Regardless of the incident, students had been scheduled for early release Monday due to the approaching snow storm.", + "length": 115 + }, + { + "text": "Twelve students and five faculty members who were in the chemistry lab at the time were taken to hospital for precautionary exams.", + "length": 130 + }, + { + "text": "More than 1,000 students and staff members were evacuated from a high school in New Jersey this morning after a chemistry lab experiment went wrong.", + "length": 148 + }, + { + "text": "The substance was being heated to show changes in temperature and its believed a contaminant in a beaker created a bad reaction, triggering plumes of smoke.", + "length": 156 + }, + { + "text": "The blunder, which took place at Kingsway Regional High School around 7:40am, released potentially toxic fumes into the air and emergency services were called out to inspect the area.", + "length": 183 + }, + { + "text": "The blunder, which took place at Kingsway Regional High School (pictured) around 7:40am, released potentially toxic fumes into the air and emergency services were called out to inspect the area .", + "length": 195 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8717267513275146 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:00.535280379Z", + "first_section_created": "2025-12-23T09:35:00.535629193Z", + "last_section_published": "2025-12-23T09:35:00.535812101Z", + "all_results_received": "2025-12-23T09:35:00.593619947Z", + "output_generated": "2025-12-23T09:35:00.593728151Z", + "total_processing_time_ms": 58, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 57, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:00.535629193Z", + "publish_time": "2025-12-23T09:35:00.535812101Z", + "first_worker_start": "2025-12-23T09:35:00.536312221Z", + "last_worker_end": "2025-12-23T09:35:00.59284Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:00.536324721Z", + "start_time": "2025-12-23T09:35:00.536367923Z", + "end_time": "2025-12-23T09:35:00.536393624Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:00.536531Z", + "start_time": "2025-12-23T09:35:00.536663Z", + "end_time": "2025-12-23T09:35:00.59284Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:00.536331822Z", + "start_time": "2025-12-23T09:35:00.536377924Z", + "end_time": "2025-12-23T09:35:00.536423825Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:00.536256519Z", + "start_time": "2025-12-23T09:35:00.536312221Z", + "end_time": "2025-12-23T09:35:00.536326221Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1414, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/0050929aa21ffe7b7659400360bf9373505ef8b6.json b/data/output/0050929aa21ffe7b7659400360bf9373505ef8b6.json new file mode 100644 index 0000000..3ec9279 --- /dev/null +++ b/data/output/0050929aa21ffe7b7659400360bf9373505ef8b6.json @@ -0,0 +1,250 @@ +{ + "file_name": "0050929aa21ffe7b7659400360bf9373505ef8b6.txt", + "total_words": 502, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "is", + "count": 14 + }, + { + "word": "he", + "count": 13 + }, + { + "word": "has", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "i", + "count": 8 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "at", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "'He is a real talisman for this team.", + "length": 37 + }, + { + "text": "'Hopefully it is not going to be the case.", + "length": 42 + }, + { + "text": "We are trying to build something here,' said Rodgers.", + "length": 53 + }, + { + "text": "Rodgers insists the youngster is not being affected by the debate over his future.", + "length": 82 + }, + { + "text": "He has been a great example of the progress that a lot of the players have made here.", + "length": 85 + }, + { + "text": "Liverpool want to tie down England forward Raheem Sterling to a new long-term contract .", + "length": 88 + }, + { + "text": "'He has gone from a youth team player to one of the top young players in European football.", + "length": 91 + }, + { + "text": "Sterling is thwarted at Old Trafford once again by Manchester United goalkeeper David de Gea .", + "length": 94 + }, + { + "text": "Brendan Rodgers is coming under increasing pressure at Liverpool after a woeful start to the season .", + "length": 101 + }, + { + "text": "Sterling has been attracting interest from Bundesliga champions and Champions League favourites Bayern .", + "length": 104 + }, + { + "text": "'The club, I am sure, will work very hard (on his contract) and Raheem is happy here, he loves being a Liverpool player.", + "length": 120 + }, + { + "text": "Liverpool manager Brendan Rodgers is relaxed about Raheem Sterling's future despite reported interest from Bayern Munich.", + "length": 121 + }, + { + "text": "'He leaves his representatives to work with the club to hopefully organise the deal for him and he has no real influence in that.", + "length": 129 + }, + { + "text": "'The kid has been a real catalyst for us in terms of our attacking threat so I don't think there is anything which has affected him.", + "length": 132 + }, + { + "text": "'All I ask him to do is concentrate on playing football and the performances will come and he has shown consistently he has been a real threat.", + "length": 143 + }, + { + "text": "'We were on a path for a couple of years and we have had to come off that path since the summer having lost some players and have had to regroup.", + "length": 145 + }, + { + "text": "'I am sure between them and the representatives we can get a deal done which will see him go on to continue to develop what is an exciting career.", + "length": 146 + }, + { + "text": "'The kid's played in a front role for me at the weekend - a role he has played before as a youngster - but I don't think it is affecting him one bit.", + "length": 149 + }, + { + "text": "'I will never be surprised teams will be linked with Raheem as he is such a big talent at 20, a wonderful young player, but I am quite relaxed about it.", + "length": 152 + }, + { + "text": "' Sterling found himself in the spotlight for the wrong reasons at the weekend after missing Liverpool's best chances in the 3-0 defeat at Manchester United.", + "length": 157 + }, + { + "text": "'I think Raheem has been brilliant, absolutely outstanding,' added Rodgers, ahead of a crucial Capital One Cup quarter-final at Championship leaders Bournemouth.", + "length": 161 + }, + { + "text": "'OK, he maybe could have got a goal or so but he puts defenders on the back foot, he is intelligent in his game, has running ability, and is getting himself into some wonderful positions.", + "length": 187 + }, + { + "text": "The Reds are trying to tie the 20-year-old to a new long-term contract and can ill-afford to lose their next potential world-class star following the departure of striker Luis Suarez to Barcelona in the summer.", + "length": 210 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.41526249051094055 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:01.035936997Z", + "first_section_created": "2025-12-23T09:35:01.036224209Z", + "last_section_published": "2025-12-23T09:35:01.036411116Z", + "all_results_received": "2025-12-23T09:35:01.097348889Z", + "output_generated": "2025-12-23T09:35:01.097530197Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:01.036224209Z", + "publish_time": "2025-12-23T09:35:01.036411116Z", + "first_worker_start": "2025-12-23T09:35:01.036931238Z", + "last_worker_end": "2025-12-23T09:35:01.096486Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:01.036875935Z", + "start_time": "2025-12-23T09:35:01.036944138Z", + "end_time": "2025-12-23T09:35:01.037008141Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:01.0371Z", + "start_time": "2025-12-23T09:35:01.037229Z", + "end_time": "2025-12-23T09:35:01.096486Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:01.036892036Z", + "start_time": "2025-12-23T09:35:01.036956739Z", + "end_time": "2025-12-23T09:35:01.037026141Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:01.036854734Z", + "start_time": "2025-12-23T09:35:01.036931238Z", + "end_time": "2025-12-23T09:35:01.036968839Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2751, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0050b976353b5fa92997493bba51838fe287ec55.json b/data/output/0050b976353b5fa92997493bba51838fe287ec55.json new file mode 100644 index 0000000..477a1c3 --- /dev/null +++ b/data/output/0050b976353b5fa92997493bba51838fe287ec55.json @@ -0,0 +1,294 @@ +{ + "file_name": "0050b976353b5fa92997493bba51838fe287ec55.txt", + "total_words": 529, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "home", + "count": 13 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "by", + "count": 9 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "is", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Ryan Gorman .", + "length": 13 + }, + { + "text": "09:05 EST, 16 September 2013 .", + "length": 30 + }, + { + "text": "12:57 EST, 16 September 2013 .", + "length": 30 + }, + { + "text": "All added up, the initial estimate is about $1.", + "length": 47 + }, + { + "text": "6million, but those initial estimates often rise.", + "length": 49 + }, + { + "text": "An overhauled kitchen and bathroom come to $65,000.", + "length": 51 + }, + { + "text": "In the Dolores Heights neighborhood of San Francisco, CA.", + "length": 57 + }, + { + "text": "To nurture someone’s green thumb, a $60,000 greenhouse.", + "length": 57 + }, + { + "text": "Lucky dog: Mr Zuckerberg, wife Priscilla and their dog own two homes in the Bay Area .", + "length": 86 + }, + { + "text": "First reported by the San Francisco Chronicle, the repairs aim to transform the house.", + "length": 86 + }, + { + "text": "The first family of Facebook also owns a mansion in suburban Palo Alto – near company headquarters.", + "length": 101 + }, + { + "text": "The 5,542-square-foot, four-bedroom home was bought by Mr Zuckerberg and wife Priscilla Chan last fall.", + "length": 103 + }, + { + "text": "The $10million home recently bought by Facebook founder Mark Zuckerberg is undergoing major renovations.", + "length": 104 + }, + { + "text": "Fit for a queen: Facebook COO palatial $11million Palo Alto home stands on the grounds of a former bungalow .", + "length": 109 + }, + { + "text": "Milennial mogul: Mr Zuckerberg's worth is in the billions, making him one of the richest people in San Francisco .", + "length": 114 + }, + { + "text": "Finally, a new exterior wall and stairway, as well as further earthquake-proofing, are expected to tally about $30,000.", + "length": 119 + }, + { + "text": "Stripped to the studs, the house’s garage is even getting an upgrade – with a turntable pad, the Chronicle reported.", + "length": 120 + }, + { + "text": "Additions to multiple floors, new windows, a new roof and custom landscaping are estimated to come in at a cool $750,000.", + "length": 121 + }, + { + "text": "Supervisor Scott Weiner leaked to the paper his knowledge of the home’s buyers, but lamented not running into them yet.", + "length": 121 + }, + { + "text": "Built on the grounds of a former bungalow, the total price tag for the 9,210 square foot home came out to over $11million.", + "length": 122 + }, + { + "text": "That permit also details remodelling of the second, third and fourth floors, as well as a new bathroom on the second floor.", + "length": 123 + }, + { + "text": "Spokespersons for both Zuckerberg and Facebook declined to comment to the Chronicle and could not be reached by MailOnline.", + "length": 123 + }, + { + "text": ", the home is slated for over $1 million in improvements, according to city building permit requests filed by a contractor.", + "length": 123 + }, + { + "text": "A first-floor office, media room, half bathroom, mud room, laundry room, wine room and wet bar are estimated to cost $720,000.", + "length": 126 + }, + { + "text": "Home away from home: Facebook CEO Mark Zuckerberg's second home, in toney Dolores Heights, is undergoing almost $2million in renovations .", + "length": 138 + }, + { + "text": "Though not publicly discussed by the recently-married college sweethearts, the home’s new owners were confirmed to the newspaper by a local politician.", + "length": 153 + }, + { + "text": "Facebook COO Sheryl Sandberg’s just-completed home in Menlo Park features an indoor living waterfall, solar-paneled roof, movie theatre and a wine cellar.", + "length": 156 + }, + { + "text": "The fixer-upper was ‘purchased through a holding company run by a lawyer who specializes in forming trusts for high-end clients,’ reported the Chronicle.", + "length": 157 + }, + { + "text": "‘Workers at the site tell us they wouldn't be surprised if the final tab comes to several million more,’ said the Chronicle, adding the gut renovation will take about a year.", + "length": 178 + }, + { + "text": "Close to the popular Dolores Park, the billionaire power couple’s home away from home is tucked neatly into the hilly neighbourhood on the edge of the Mission District, according to reports.", + "length": 192 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.609862208366394 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:01.53719464Z", + "first_section_created": "2025-12-23T09:35:01.537601756Z", + "last_section_published": "2025-12-23T09:35:01.537790564Z", + "all_results_received": "2025-12-23T09:35:01.603179518Z", + "output_generated": "2025-12-23T09:35:01.603343224Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:01.537601756Z", + "publish_time": "2025-12-23T09:35:01.537790564Z", + "first_worker_start": "2025-12-23T09:35:01.538266483Z", + "last_worker_end": "2025-12-23T09:35:01.602264Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:01.538235582Z", + "start_time": "2025-12-23T09:35:01.538297484Z", + "end_time": "2025-12-23T09:35:01.538357487Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:01.538509Z", + "start_time": "2025-12-23T09:35:01.538658Z", + "end_time": "2025-12-23T09:35:01.602264Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:01.538289384Z", + "start_time": "2025-12-23T09:35:01.538367587Z", + "end_time": "2025-12-23T09:35:01.538459191Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:01.53819578Z", + "start_time": "2025-12-23T09:35:01.538266483Z", + "end_time": "2025-12-23T09:35:01.538301485Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3169, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0050c34aee5292974432f1de7f22e9f142928020.json b/data/output/0050c34aee5292974432f1de7f22e9f142928020.json new file mode 100644 index 0000000..f39d185 --- /dev/null +++ b/data/output/0050c34aee5292974432f1de7f22e9f142928020.json @@ -0,0 +1,294 @@ +{ + "file_name": "0050c34aee5292974432f1de7f22e9f142928020.txt", + "total_words": 641, + "top_n_words": [ + { + "word": "their", + "count": 29 + }, + { + "word": "the", + "count": 23 + }, + { + "word": "were", + "count": 17 + }, + { + "word": "relationship", + "count": 16 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "status", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "who", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Spare us!", + "length": 9 + }, + { + "text": "Lucy Waterlow .", + "length": 15 + }, + { + "text": "We are stronger than ever.", + "length": 26 + }, + { + "text": "Through all the good times and some bad...", + "length": 42 + }, + { + "text": "What does your profile say about your relationship?", + "length": 51 + }, + { + "text": "' Coleen's Tweet as above would also fall under 'high disclosure' category.", + "length": 75 + }, + { + "text": "However, the next part of their study found it doesn't make you a better friend.", + "length": 80 + }, + { + "text": "Those who had a profile picture with the partner were deemed to be more content .", + "length": 81 + }, + { + "text": "Those who had the most 'high disclosure' status updates were particularly disliked.", + "length": 83 + }, + { + "text": "They were also asked to rate how much they liked the person whose profile they were viewing.", + "length": 92 + }, + { + "text": "Researchers found people disliked status updates were people talked too much about their partner .", + "length": 98 + }, + { + "text": "They asked more than 200 people who were in relationships to give them access to their Facebook profile.", + "length": 104 + }, + { + "text": "They also made up status updates revealing various levels of disclosure about their fictional relationship.", + "length": 107 + }, + { + "text": "Celebrities including Wag Coleen Rooney are well-known for bragging about their other halves on social media.", + "length": 109 + }, + { + "text": "However, those who disclosed too much about their relationship in their status updates were the least popular.", + "length": 110 + }, + { + "text": "High disclosure included status updates such as 'Pining away for Jordan… I just love you so much I can't stand it!", + "length": 116 + }, + { + "text": "This is what the researchers discovered for their paper which will be published in the journal Personal Relationships.", + "length": 118 + }, + { + "text": "People were then asked to judge each fictional Facebook profile and decide which were the most satisfied in their relationship.", + "length": 127 + }, + { + "text": "So postings like Coleen's may make people perceive a relationship as successful - but that doesn't mean they'll like you for it.", + "length": 128 + }, + { + "text": "So in love, so annoying: Selfies with your partner and gushing updates, like this one recently posted by Coleen Rooney, are disliked .", + "length": 134 + }, + { + "text": "Once again, those with profile pictures of their partners who displayed their relationship status were judged to be the most satisfied.", + "length": 135 + }, + { + "text": "Therefore the researchers concluded that a relationship is perceived to be 'better' if someone makes a show of it on their Facebook page.", + "length": 137 + }, + { + "text": "Those who had profile pictures with their partners and who displayed their relationship status were deemed to have 'higher quality' relationships.", + "length": 146 + }, + { + "text": "' But while such public declarations of affection might make boost your partner's ego, they won't help your own popularity, according to new research.", + "length": 150 + }, + { + "text": "The researchers were then asked to judge how satisfied and committed they thought each person was in their relationship, based on their Facebook profile.", + "length": 153 + }, + { + "text": "Earlier this month, Coleen Tweeted a selfie of her and husband Wayne on their wedding anniversary with the comment: 'Six years ago today I became Mrs Rooney!", + "length": 157 + }, + { + "text": "It seems frequently boosting about your love - which can also be done by changing your relationship status on Facebook - is a means of making your friends mad.", + "length": 159 + }, + { + "text": "The researchers next created a variety of fictional Facebook profiles, with some using profile pictures of couples and displaying their current relationship status.", + "length": 164 + }, + { + "text": "Each of the people who had shared their Facebook pages with the researchers were also asked to answer a questionnaire on how satisfied they were in their relationship.", + "length": 167 + }, + { + "text": "They found that more than a quarter had profile pictures showing them with their partner and 70 per cent had changed their relationship status to reflect their love life.", + "length": 170 + }, + { + "text": "Low disclosure included statuses that were about a relationship but impersonal eg 'I love my girlfriend' while no relationship disclosure covered statuses like 'I've lost my mobile, email me'.", + "length": 192 + }, + { + "text": "A study published in the Science of Relationships found that people who posted pictures of themselves and their partners looking lovey dovey, or who made gushing status updates about their other half, were disliked by their friends.", + "length": 232 + }, + { + "text": "The answers reflected the assumptions the researchers had made based on the Facebook profiles - the people who had pictures with their partners and shared their relationship status were found to be the most satisfied and committed based on their questionnaire answers.", + "length": 268 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4415380656719208 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:02.038954303Z", + "first_section_created": "2025-12-23T09:35:02.041088689Z", + "last_section_published": "2025-12-23T09:35:02.041300498Z", + "all_results_received": "2025-12-23T09:35:02.103160408Z", + "output_generated": "2025-12-23T09:35:02.103338616Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:02.041088689Z", + "publish_time": "2025-12-23T09:35:02.041300498Z", + "first_worker_start": "2025-12-23T09:35:02.041713915Z", + "last_worker_end": "2025-12-23T09:35:02.102249Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:02.041694514Z", + "start_time": "2025-12-23T09:35:02.041761717Z", + "end_time": "2025-12-23T09:35:02.04183372Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:02.042197Z", + "start_time": "2025-12-23T09:35:02.042337Z", + "end_time": "2025-12-23T09:35:02.102249Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:02.041687314Z", + "start_time": "2025-12-23T09:35:02.041758716Z", + "end_time": "2025-12-23T09:35:02.04183462Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:02.041652412Z", + "start_time": "2025-12-23T09:35:02.041713915Z", + "end_time": "2025-12-23T09:35:02.041753216Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3970, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0050c7a3b59f536bece30abfb7f144170daf44ad.json b/data/output/0050c7a3b59f536bece30abfb7f144170daf44ad.json new file mode 100644 index 0000000..57aa185 --- /dev/null +++ b/data/output/0050c7a3b59f536bece30abfb7f144170daf44ad.json @@ -0,0 +1,298 @@ +{ + "file_name": "0050c7a3b59f536bece30abfb7f144170daf44ad.txt", + "total_words": 477, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "wreath", + "count": 14 + }, + { + "word": "diamonds", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "for", + "count": 7 + }, + { + "word": "s", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "However, .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "experiences.", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Bianca London .", + "length": 15 + }, + { + "text": "' The bad news?", + "length": 15 + }, + { + "text": "05:34 EST, 4 December 2013 .", + "length": 28 + }, + { + "text": "05:34 EST, 4 December 2013 .", + "length": 28 + }, + { + "text": "Tobias Kormind, co-owner of 77 .", + "length": 32 + }, + { + "text": "That will make the neighbours jealous!", + "length": 38 + }, + { + "text": "can be reattached to next year’s wreath, say the creators.", + "length": 60 + }, + { + "text": "could wish for; plus a very worthwhile investment for the future.", + "length": 65 + }, + { + "text": "Diamonds, said: 'It is exhilarating to participate in curating a .", + "length": 66 + }, + { + "text": "com, is selling the ultimate Christmas wreath for a cool £2,835,000.", + "length": 69 + }, + { + "text": "all is not lost because the stones from the wreath are removable and .", + "length": 70 + }, + { + "text": "endeavor to offer truly exceptional newly launching luxury products and .", + "length": 73 + }, + { + "text": "selection of gems that would create the most sparkling Christmas anyone .", + "length": 73 + }, + { + "text": "One luxury website has the answer, assuming you have £3 million to spare.", + "length": 74 + }, + { + "text": "offer the world’s most expensive Christmas wreath as part of our ongoing .", + "length": 76 + }, + { + "text": "Looking for the ultimate Christmas wreath to make your neighbours green with envy?", + "length": 82 + }, + { + "text": "The dazzling wreath will only last twelve days - that's a cost of £236,250 per day.", + "length": 84 + }, + { + "text": "Marcel Knobil founder of VeryFirstTo, who is selling the wreath, said: 'We are delighted to be able to .", + "length": 104 + }, + { + "text": "Studded with over 40 diamonds and rubies totalling over 138 carats, exclusive launches website, VeryFirstTo.", + "length": 108 + }, + { + "text": "His client portfolio includes the Royal households, country clubs, galleries, and film and TV production houses.", + "length": 112 + }, + { + "text": "Alternatively, 77 Diamonds is offering the complimentary service of mounting them into a bespoke designed piece of jewellery.", + "length": 125 + }, + { + "text": "Florist: The 60cm wreath has been created by top Finnish Floral Designer, Pasi Jokinen-Carter, whose clients include the Royal household .", + "length": 138 + }, + { + "text": "Exquisite: The wreath is made up of rare flowers from Finland and once it wilts, the diamonds can be transformed into a bespoke piece of jewellery .", + "length": 148 + }, + { + "text": "A luxury website has unveiled the most expensive Christmas wreath studded with 16 Rubies and 32 diamonds totalling over 138 carats that is being sold for nearly £3m .", + "length": 167 + }, + { + "text": "The 60cm wreath is made up of some of the most luxurious flowers and leaves in the world and is created by world-renowned Finnish Floral Designer, Pasi Jokinen-Carter.", + "length": 167 + }, + { + "text": "' And while the wreath may cost more than the average Briton could ever dream of earning in a lifetime, a donation of £1,000 from the sale will be made to The Prince’s Trust.", + "length": 177 + }, + { + "text": "For this high-end commission Jokien-Carter has used Laurus, Lingonberry and Blueberry stems sourced from his country house in Finland - as they are not commercially available in the UK.", + "length": 185 + }, + { + "text": "' The diamonds and rubies for the wreath are provided by 77 Diamonds, who offer the world's largest selection of loose diamonds, some 300,000, or over 70 per cent, of the world's finest polished diamonds.", + "length": 204 + }, + { + "text": "Speaking about his latest creation, he said: 'I am passionate about my craft and this recent invitation to create an exclusive wreath, using natural materials and diamonds, has been an exciting and exceptional project.", + "length": 218 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.43874698877334595 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:02.54205822Z", + "first_section_created": "2025-12-23T09:35:02.543934696Z", + "last_section_published": "2025-12-23T09:35:02.544146505Z", + "all_results_received": "2025-12-23T09:35:02.616114526Z", + "output_generated": "2025-12-23T09:35:02.616269432Z", + "total_processing_time_ms": 74, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:02.543934696Z", + "publish_time": "2025-12-23T09:35:02.544146505Z", + "first_worker_start": "2025-12-23T09:35:02.544604423Z", + "last_worker_end": "2025-12-23T09:35:02.615259Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:02.544662326Z", + "start_time": "2025-12-23T09:35:02.544722128Z", + "end_time": "2025-12-23T09:35:02.544786631Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:02.544875Z", + "start_time": "2025-12-23T09:35:02.545002Z", + "end_time": "2025-12-23T09:35:02.615259Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:02.544558722Z", + "start_time": "2025-12-23T09:35:02.544616524Z", + "end_time": "2025-12-23T09:35:02.544681627Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:02.544542521Z", + "start_time": "2025-12-23T09:35:02.544604423Z", + "end_time": "2025-12-23T09:35:02.544635825Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2882, + "slowest_section_id": 0, + "slowest_section_time_ms": 71 + } +} diff --git a/data/output/00511a6bc78136a1d9de5c31aa134833d6fe17b8.json b/data/output/00511a6bc78136a1d9de5c31aa134833d6fe17b8.json new file mode 100644 index 0000000..d944015 --- /dev/null +++ b/data/output/00511a6bc78136a1d9de5c31aa134833d6fe17b8.json @@ -0,0 +1,274 @@ +{ + "file_name": "00511a6bc78136a1d9de5c31aa134833d6fe17b8.txt", + "total_words": 363, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "cyclodeo", + "count": 8 + }, + { + "word": "users", + "count": 7 + }, + { + "word": "from", + "count": 6 + }, + { + "word": "and", + "count": 5 + }, + { + "word": "their", + "count": 5 + }, + { + "word": "bike", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "com/?", + "length": 5 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Emily Davies .", + "length": 14 + }, + { + "text": "plan their bike rides.", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "05:09 EST, 8 April 2013 .", + "length": 25 + }, + { + "text": "08:43 EST, 8 April 2013 .", + "length": 25 + }, + { + "text": "page_id=2337\u0026project_id=179 .", + "length": 29 + }, + { + "text": "Each route uploaded to the website .", + "length": 36 + }, + { + "text": "For more information about cyclodeo go to: http://velo-city2013.", + "length": 64 + }, + { + "text": "'I wanted to find a way to share my traveling experiences,' he said.", + "length": 68 + }, + { + "text": "shows the duration, elevation and distance of the journeys to help users .", + "length": 74 + }, + { + "text": "'Cyclists would put their own videos, which share their own cycling rides from all over the world.", + "length": 98 + }, + { + "text": "The internet sharing platform Cyclodeo syncs videos with maps so cyclists can plan journeys thoroughly .", + "length": 104 + }, + { + "text": "One of the routes users can follow is a bike ride down 11th Avenue from 50th Street in New York City, U.", + "length": 104 + }, + { + "text": "Videos uploaded to the Cyclodeo website allow users to see the routes online from the cyclist's perspective .", + "length": 109 + }, + { + "text": "Entrepreneur Samir Bendidam said one of the functions of Cyclodeo was to help users check the safety of routes .", + "length": 112 + }, + { + "text": "Mr Bendida told Mashable he created Cyclodeo as a community platform to help people share their cycling knowledge.", + "length": 114 + }, + { + "text": "Cyclodeo, created by Samir Bendidam, allows web users to take a virtual tour of New York and plan future bike rides .", + "length": 117 + }, + { + "text": "As the Cyclodeo videos are playing footage of the journeys, a moving marker highlights the location of the video on a map.", + "length": 122 + }, + { + "text": "' The site only works from desktops at present, but Mr Bendida hopes to develop the service for smartphones and tablets in future.", + "length": 130 + }, + { + "text": "Entrepreneur Samir Bendida has filmed cycle routes around Central Park and along the Hudson River Greenway and uploaded them online.", + "length": 132 + }, + { + "text": "Each section of the video is geo-coded, which means you can click anywhere on the route to show a visualization from the cyclist's perspective.", + "length": 143 + }, + { + "text": "The online service, called Cyclodeo, is synchronized with GPS data, allowing web users to check the safety of journeys they hope to cycle from the comfort of their own home.", + "length": 173 + }, + { + "text": "For those who love the idea of cycling around New York but are put off by the hectic traffic, a Dutch start-up company has created a solution which web users take a bike ride without leaving your sofa.", + "length": 201 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5204793214797974 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:03.044953829Z", + "first_section_created": "2025-12-23T09:35:03.045310644Z", + "last_section_published": "2025-12-23T09:35:03.045555754Z", + "all_results_received": "2025-12-23T09:35:03.143506229Z", + "output_generated": "2025-12-23T09:35:03.143680336Z", + "total_processing_time_ms": 98, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 97, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:03.045310644Z", + "publish_time": "2025-12-23T09:35:03.045555754Z", + "first_worker_start": "2025-12-23T09:35:03.046119276Z", + "last_worker_end": "2025-12-23T09:35:03.142502Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:03.046178179Z", + "start_time": "2025-12-23T09:35:03.046255882Z", + "end_time": "2025-12-23T09:35:03.046305484Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:03.046387Z", + "start_time": "2025-12-23T09:35:03.046536Z", + "end_time": "2025-12-23T09:35:03.142502Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 95 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:03.046085575Z", + "start_time": "2025-12-23T09:35:03.046153978Z", + "end_time": "2025-12-23T09:35:03.04621258Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:03.046054774Z", + "start_time": "2025-12-23T09:35:03.046119276Z", + "end_time": "2025-12-23T09:35:03.046141177Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 95, + "min_processing_ms": 95, + "max_processing_ms": 95, + "avg_processing_ms": 95, + "median_processing_ms": 95, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2099, + "slowest_section_id": 0, + "slowest_section_time_ms": 97 + } +} diff --git a/data/output/00512126d65bf2a36801e4ef37f28c86c29deb28.json b/data/output/00512126d65bf2a36801e4ef37f28c86c29deb28.json new file mode 100644 index 0000000..62e7977 --- /dev/null +++ b/data/output/00512126d65bf2a36801e4ef37f28c86c29deb28.json @@ -0,0 +1,356 @@ +{ + "file_name": "00512126d65bf2a36801e4ef37f28c86c29deb28.txt", + "total_words": 877, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "he", + "count": 20 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "trier", + "count": 18 + }, + { + "word": "von", + "count": 18 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "for", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "maybe he's cruel and vicious.", + "length": 29 + }, + { + "text": "Tell us below in the SoundOff box .", + "length": 35 + }, + { + "text": "\"You really, really don't like women.", + "length": 37 + }, + { + "text": "\"I had heard stories about him as a director ...", + "length": 48 + }, + { + "text": "Do you think that Lars von Trier is a woman-hater?", + "length": 50 + }, + { + "text": "\"If I didn't follow my instinct, then I can't work.", + "length": 51 + }, + { + "text": "In pictures: The wierd world of Lars von Trier » .", + "length": 51 + }, + { + "text": "Von Trier has a reputation for being tough on his actors.", + "length": 57 + }, + { + "text": "\"The good side is that you can sometimes achieve something creatively.", + "length": 70 + }, + { + "text": "But, of course, it always also allows some of these negative thoughts in.", + "length": 73 + }, + { + "text": "\" \"Lars von Trier, we get it,\" wrote film critic Wendy Ide in UK paper The Times.", + "length": 81 + }, + { + "text": "\" Watch Lars von Trier talking to CNN's The Screening Room about \"Antichrist\" » .", + "length": 83 + }, + { + "text": "\" \"I was scared,\" admitted Gainsbourg who won Best Actress at Cannes for her performance.", + "length": 89 + }, + { + "text": "\" But she now describes him as her \"guide\" and \"the greatest director I've ever worked with.", + "length": 92 + }, + { + "text": "Von Trier claimed each morning she would say \"Mr von Trier, I despise you,\" and spit at him.", + "length": 92 + }, + { + "text": "It is rumored Bjork became so unhinged filming \"Dancer in the Dark\" she ate her own cardigan.", + "length": 93 + }, + { + "text": "Each visit to Cannes involves a five-day road trip from Denmark to the French Riviera by camper van.", + "length": 100 + }, + { + "text": "Danish auteur Lars von Trier has been making films that shock, provoke and impress for over 40 years.", + "length": 101 + }, + { + "text": "His friend and long-time collaborator, actor Stellan Skarsgard describes von Trier as \"not uncomplex.", + "length": 101 + }, + { + "text": "But it was the level of pornographic sex and visceral brutality that outraged some and astonished many.", + "length": 103 + }, + { + "text": "The director says that he shot the film as a form of therapy after recovering from a serious mental illness.", + "length": 108 + }, + { + "text": "He says he finds it difficult to know how to satisfy the needs of others with his films and so works only for himself.", + "length": 118 + }, + { + "text": "\"I think that if you are, shall we say, sensitive, then there is a good side and a bad side about it,\" said von Trier.", + "length": 118 + }, + { + "text": "Indeed, a few years ago, it was questionable whether von Trier, who is famously multi-phobic, would be able to make another film.", + "length": 129 + }, + { + "text": "\" Fueled by his unconventional approach and upbringing, the mythology surrounding von Trier looms large over everything he touches.", + "length": 131 + }, + { + "text": "But, von Trier says, he has always taken a deeply personal approach to the experimental, often dark and challenging works that he creates.", + "length": 138 + }, + { + "text": "\"I feel very strongly for satisfying, maybe not my own needs, but my own idea of the film and the images that come from within,\" he told CNN.", + "length": 141 + }, + { + "text": "Brought up in Copenhagen by bohemian parents who were committed nudists, he suffers from crippling bouts of agoraphobia; and, most famously, a fear of flying.", + "length": 158 + }, + { + "text": "In the winter of 2006, he fell victim to depression and checked into hospital, the aftermath of which left him \"like a blank sheet of paper,\" he told Danish paper Politiken at the time.", + "length": 185 + }, + { + "text": "Filmmakers are expected to give audiences a hard time at Cannes and the two-hander starring Willem Dafoe and Charlotte Gainsbourg as a couple grieving the loss of a child is no exception.", + "length": 187 + }, + { + "text": "\" Misogyny couldn't be further from the truth, according to Von Trier, who says he sees himself up there on the screen: \"I mostly see myself as the female character,\" the 53-year-old director told CNN in Cannes.", + "length": 211 + }, + { + "text": "LONDON, England (CNN) -- When Danish auteur Lars von Trier presented his gothic thriller, \"Antichrist\" at Cannes Film Festival last month, it was greeted with cat-calls, jeers and, at times, disbelieving laughter.", + "length": 213 + }, + { + "text": "Von Trier was labeled a woman-hater for the wince-inducingly horrific final scene in which female lead Charlotte Gainsbourg takes a pair of rusty scissors to her genitals and performs a DIY clitoridectomy right to camera.", + "length": 221 + }, + { + "text": "Today, if not fully recovered -- the most terrifying thing he can think of is still \"myself\" -- he is able to function once more and is receiving cognitive behavioral therapy to help him face up to his psychological issues.", + "length": 223 + }, + { + "text": "An Ecumenical Jury that normally hands out a prize at Cannes celebrating spiritual values felt moved to award \"Antichrist\" an \"anti-prize\" for being \"the most misogynist movie from the self-proclaimed biggest director in the world.", + "length": 231 + }, + { + "text": "Despite, or perhaps because of, what he describes as his \"sensitive\" nature, von Trier is one of today's great contemporary European auteurs, considered responsible for spearheading a revival in the fortunes of Scandinavian filmmaking.", + "length": 235 + }, + { + "text": "He has been nominated for the top prize at Cannes, the Palme D'Or, a staggering eight times, winning once in 2000 for the harrowing operatic tragedy, \"Dancer in the Dark,\" starring Icelandic musician, Bjork, who also took home the Best Actress prize that year.", + "length": 260 + }, + { + "text": "He has an undeniable egotistical streak: this year at Cannes, he declared, \"I am the best filmmaker in the world,\" and in 1991, when displeased that Cannes jury president Roman Polanski had only awarded \"Europa\" the runner-up Grand Prix prize, he called him a \"dwarf.", + "length": 267 + }, + { + "text": "\" He also seems to actively court controversy: 1998 Palme D'Or contender \"Dogme #2: The Idiots\" grabbed headlines for being the first commercial film to show non-simulated sex on screen, and for von Trier's typically eccentric claim that the best way to prepare actors for sex scenes is to direct in the nude.", + "length": 309 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5980491042137146 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:03.546382879Z", + "first_section_created": "2025-12-23T09:35:03.546754994Z", + "last_section_published": "2025-12-23T09:35:03.547265215Z", + "all_results_received": "2025-12-23T09:35:03.651785556Z", + "output_generated": "2025-12-23T09:35:03.651978864Z", + "total_processing_time_ms": 105, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 104, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:03.546754994Z", + "publish_time": "2025-12-23T09:35:03.547104708Z", + "first_worker_start": "2025-12-23T09:35:03.547697932Z", + "last_worker_end": "2025-12-23T09:35:03.650436Z", + "total_journey_time_ms": 103, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:03.54765513Z", + "start_time": "2025-12-23T09:35:03.547721433Z", + "end_time": "2025-12-23T09:35:03.547797136Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:03.547954Z", + "start_time": "2025-12-23T09:35:03.548182Z", + "end_time": "2025-12-23T09:35:03.650436Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 102 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:03.547662131Z", + "start_time": "2025-12-23T09:35:03.547736734Z", + "end_time": "2025-12-23T09:35:03.547852238Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:03.547632929Z", + "start_time": "2025-12-23T09:35:03.547697932Z", + "end_time": "2025-12-23T09:35:03.547744034Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:03.547192212Z", + "publish_time": "2025-12-23T09:35:03.547265215Z", + "first_worker_start": "2025-12-23T09:35:03.547739934Z", + "last_worker_end": "2025-12-23T09:35:03.615619Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:03.547814037Z", + "start_time": "2025-12-23T09:35:03.547838038Z", + "end_time": "2025-12-23T09:35:03.547842438Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:03.548457Z", + "start_time": "2025-12-23T09:35:03.548649Z", + "end_time": "2025-12-23T09:35:03.615619Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:03.547812937Z", + "start_time": "2025-12-23T09:35:03.547849638Z", + "end_time": "2025-12-23T09:35:03.547855838Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:03.547671231Z", + "start_time": "2025-12-23T09:35:03.547739934Z", + "end_time": "2025-12-23T09:35:03.547743834Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 168, + "min_processing_ms": 66, + "max_processing_ms": 102, + "avg_processing_ms": 84, + "median_processing_ms": 102, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2526, + "slowest_section_id": 0, + "slowest_section_time_ms": 103 + } +} diff --git a/data/output/005165822b715d3b8a4e8a26e098eb2bd0824e54.json b/data/output/005165822b715d3b8a4e8a26e098eb2bd0824e54.json new file mode 100644 index 0000000..1ff9a3d --- /dev/null +++ b/data/output/005165822b715d3b8a4e8a26e098eb2bd0824e54.json @@ -0,0 +1,400 @@ +{ + "file_name": "005165822b715d3b8a4e8a26e098eb2bd0824e54.txt", + "total_words": 969, + "top_n_words": [ + { + "word": "the", + "count": 56 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "e", + "count": 14 + }, + { + "word": "as", + "count": 13 + }, + { + "word": "car", + "count": 13 + }, + { + "word": "cigarette", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "One of 1.", + "length": 9 + }, + { + "text": "5m Americans and 1.", + "length": 19 + }, + { + "text": "5m people in the UK.", + "length": 20 + }, + { + "text": "They are used by more than 3.", + "length": 29 + }, + { + "text": "Astonishing damage: The £29.", + "length": 29 + }, + { + "text": "'I can't believe what happened.", + "length": 31 + }, + { + "text": "The whole situation has scared me.", + "length": 34 + }, + { + "text": "When I told my partner she was shocked.", + "length": 39 + }, + { + "text": "Even the front visors are burnt through.", + "length": 40 + }, + { + "text": "I want people to realise they can be dangerous.", + "length": 47 + }, + { + "text": "chemicals that make some as harmful as normal tobacco.", + "length": 54 + }, + { + "text": "What if it was in the house charging and had gone off inside?", + "length": 61 + }, + { + "text": "The product is excellent, but the thought of it blowing up is terrifying.", + "length": 73 + }, + { + "text": "99 Vaporiz Tank is one of the more expensive e-cigarettes on the market .", + "length": 73 + }, + { + "text": "'It is supposed to be the best in the market so what are the cheaper ones like?", + "length": 79 + }, + { + "text": "Father of three Mr Thomas, from Maesycwmmer, South Wales, says he had left the £29.", + "length": 84 + }, + { + "text": "'I know a lot of people who smoke and I've told them about the electronic cigarette.", + "length": 84 + }, + { + "text": "99 device charging overnight in the 12V power socket of his company-owned Skoda Superb.", + "length": 87 + }, + { + "text": "He started smoking e-cigarettes four months ago in an attempt to kick his 40-a-day habit.", + "length": 89 + }, + { + "text": "'The cigarette had been charging in the front point and it's as if it went off like a firework.", + "length": 95 + }, + { + "text": "'I got up about an hour later for work and when I went out to the car I found everything burnt out.", + "length": 99 + }, + { + "text": "Ruined: The windows of the car were completely blackened by the explosion which happened overnight .", + "length": 100 + }, + { + "text": "Three-year-old Khonor Barlow was burnt when his mother's e-cigarette exploded as they drove in Utah .", + "length": 101 + }, + { + "text": "A motorist said today that his car was wrecked when his electronic cigarette exploded while charging.", + "length": 101 + }, + { + "text": "Burnt out: Chris Thomas says his car seat was destroyed when his e-cigarette exploded while charging up .", + "length": 105 + }, + { + "text": "The father of one, 37, from Maesycwmmer in south Wales, with the e-cigarette he says set fire to his car .", + "length": 106 + }, + { + "text": "He added: 'I used to be a heavy smoker but I've not touched a cigarette for four months since I tried these.", + "length": 108 + }, + { + "text": "Big tobacco has jumped en masse into the e-cigarette market and all the major cigarette makers now make them.", + "length": 109 + }, + { + "text": "E-cigarettes are currently used by millions of people around the world as a replacement for real cigarettes .", + "length": 109 + }, + { + "text": "Laws vary widely concerning their use and sale, and are the subject of pending legislation and ongoing debate.", + "length": 110 + }, + { + "text": "A three-year-old boy in America suffered burns when his mother's e-cigarette exploded while charging last week.", + "length": 111 + }, + { + "text": "' As e-cigarettes have grown in popularity there have been an increasing number of incidents concerning their safety.", + "length": 117 + }, + { + "text": "'God knows what would have happened then, or if it had gone off when we were driving and I've got children inside the car.", + "length": 122 + }, + { + "text": "The firm's website warns users not to leave the cigarettes unattended as they charge up, which can take around four hours.", + "length": 122 + }, + { + "text": "They are as effective as nicotine patches for helping smokers to quit, according to a recent University of Auckland study.", + "length": 122 + }, + { + "text": "3m Britons who have taken up the devices in recent years, Mr Thomas chose a Vapouriz Tank which is topped up using a liquid.", + "length": 124 + }, + { + "text": "It uses a heating element that vaporises a liquid solution and some release nicotine, while others merely release flavoured vapour.", + "length": 131 + }, + { + "text": "'They are meant to be the saviour but when you see the see the state of the car it makes you wonder what chemicals are inside them.", + "length": 131 + }, + { + "text": "Flames destroyed the rear seat and a child seat, scorched the front headrests and visors and left all the windows badly smoke blackened.", + "length": 136 + }, + { + "text": "As they've grown in popularity there have been several reports of e-cigarettes exploding due to the lithium-ion batteries inside them becoming overheated .", + "length": 155 + }, + { + "text": "' The spokesman said: 'We do warn users not to leave the cigarettes unattended as they charge up, so this does not mean you should leave them in your car overnight.", + "length": 164 + }, + { + "text": "In theory they can be used anywhere: planes, hospitals or restaurants, but some companies are banning them on varying grounds, perhaps because they bother other people.", + "length": 168 + }, + { + "text": "Mr Thomas, who said he'd tried two cheaper alternatives before but they both blew up a USB drive, said he was now planning to give up the tobacco alternative altogether.", + "length": 169 + }, + { + "text": "' Mr Thomas, from Maesycwmmer, South Wales, has not been told how much it will cost to repair the damage to the year-old estate car but the bill could run into thousands.", + "length": 170 + }, + { + "text": "The vehicle engineer said: 'The car alarm went off in the early hours of the morning so I went out and saw there was no-one around and clicked it off and went back to bed.", + "length": 171 + }, + { + "text": "Chris Thomas, 37, said the e-cigarette shot out of the 12V power socket 'like a firework' and landed on the back seat of his Skoda company car, setting setting fire to the upholstery.", + "length": 183 + }, + { + "text": "An electronic cigarette (or e-cigarette), personal vaporiser (PV), or electronic nicotine delivery system (ENDS) is an electronic inhaler meant to simulate and substitute for tobacco smoking.", + "length": 191 + }, + { + "text": "Khonor Barlow was burnt after his mother's White Rhino device bounced off her car's ceiling and onto the boy as he sat in a car seat in the back as they drove in their hometown of Mount Pleasant, Utah.", + "length": 201 + }, + { + "text": "'Mr Thomas, having used three different brands of e-cigarettes over a four-month period, has allegedly  had each one ‘blow up’ – this leads me to think that he may be using wrong or unsuitable chargers.", + "length": 209 + }, + { + "text": "' A spokesman for Vapouriz, which offers advice on charging the devices on its website, said: 'Chris Thomas has not contacted us with regards to any complaint, and actually purchased ‘e-liquid’, which in its diluted form is not flammable, from us only four days ago.", + "length": 270 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7434095144271851 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:04.048054338Z", + "first_section_created": "2025-12-23T09:35:04.048424353Z", + "last_section_published": "2025-12-23T09:35:04.048910973Z", + "all_results_received": "2025-12-23T09:35:04.215512134Z", + "output_generated": "2025-12-23T09:35:04.215758244Z", + "total_processing_time_ms": 167, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 166, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:04.048424353Z", + "publish_time": "2025-12-23T09:35:04.048728265Z", + "first_worker_start": "2025-12-23T09:35:04.049404993Z", + "last_worker_end": "2025-12-23T09:35:04.13741Z", + "total_journey_time_ms": 88, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:04.04934109Z", + "start_time": "2025-12-23T09:35:04.049404993Z", + "end_time": "2025-12-23T09:35:04.049558099Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:04.04976Z", + "start_time": "2025-12-23T09:35:04.04992Z", + "end_time": "2025-12-23T09:35:04.13741Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 87 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:04.049470196Z", + "start_time": "2025-12-23T09:35:04.049524698Z", + "end_time": "2025-12-23T09:35:04.049621902Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:04.049406593Z", + "start_time": "2025-12-23T09:35:04.049654403Z", + "end_time": "2025-12-23T09:35:04.049729106Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:04.048781468Z", + "publish_time": "2025-12-23T09:35:04.048910973Z", + "first_worker_start": "2025-12-23T09:35:04.049465995Z", + "last_worker_end": "2025-12-23T09:35:04.212008Z", + "total_journey_time_ms": 163, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:04.049397993Z", + "start_time": "2025-12-23T09:35:04.049657703Z", + "end_time": "2025-12-23T09:35:04.049682504Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:04.049757Z", + "start_time": "2025-12-23T09:35:04.049904Z", + "end_time": "2025-12-23T09:35:04.212008Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 162 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:04.049415093Z", + "start_time": "2025-12-23T09:35:04.049470496Z", + "end_time": "2025-12-23T09:35:04.049488296Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:04.049425394Z", + "start_time": "2025-12-23T09:35:04.049465995Z", + "end_time": "2025-12-23T09:35:04.049472296Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 249, + "min_processing_ms": 87, + "max_processing_ms": 162, + "avg_processing_ms": 124, + "median_processing_ms": 162, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2691, + "slowest_section_id": 1, + "slowest_section_time_ms": 163 + } +} diff --git a/data/output/0051a17112aa65327cab8475c6439f8854b77bb3.json b/data/output/0051a17112aa65327cab8475c6439f8854b77bb3.json new file mode 100644 index 0000000..d552573 --- /dev/null +++ b/data/output/0051a17112aa65327cab8475c6439f8854b77bb3.json @@ -0,0 +1,326 @@ +{ + "file_name": "0051a17112aa65327cab8475c6439f8854b77bb3.txt", + "total_words": 790, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "he", + "count": 19 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "his", + "count": 16 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "berk", + "count": 15 + }, + { + "word": "that", + "count": 15 + }, + { + "word": "was", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Rep.", + "length": 4 + }, + { + "text": "Rep.", + "length": 4 + }, + { + "text": "On Monday, U.", + "length": 13 + }, + { + "text": "There is no U.", + "length": 14 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Mike Fitzpatrick (R-Bucks) asked the U.", + "length": 39 + }, + { + "text": "Mike Fitzpatrick (R-Bucks) asked the U.", + "length": 39 + }, + { + "text": "'Why don’t you just admit you’re a phony?", + "length": 45 + }, + { + "text": "'He was wearing awards that I earned and he didn't.", + "length": 51 + }, + { + "text": "com that fake soldiers are becoming increasingly common.", + "length": 56 + }, + { + "text": "'He's impersonating in the uniform people died for,' Berk added.", + "length": 64 + }, + { + "text": "Army Ranger with three CIBs named Yetman listed on any official record.", + "length": 71 + }, + { + "text": "When the man denied lying about his military service, Berk lost his cool.", + "length": 73 + }, + { + "text": "Anthony Anderson, a vet who runs the Guardian of Valor website told Philly.", + "length": 75 + }, + { + "text": "'Here it is, stolen valor at its finest,' he shouted, as Yetman began to walk away.", + "length": 83 + }, + { + "text": "Attorney in Philadelphia to determine if there was evidence of a federal crime against Sean Yetman .", + "length": 100 + }, + { + "text": "’ It was unknown if the uniformed man received any special treatment from any store at the mall, but on Monday, U.", + "length": 116 + }, + { + "text": "Using his cell phone to record their conversation, Berk approached the man and quizzed him about his experiences as a soldier.", + "length": 126 + }, + { + "text": "Most are seeking attention and recognition, while others use the uniform to receive exclusive military discounts at stores and restaurants.", + "length": 139 + }, + { + "text": "Berk’s video of the confrontation was uploaded to the Stolen Valor YouTube account, where it has since been viewed over two million times.", + "length": 140 + }, + { + "text": "Yetman claimed he had 'literally just got home from Fort Lewis' and was shopping with his Sergeant Major - who he offered to introduce Berk to.", + "length": 143 + }, + { + "text": "He also said his grandfather had served in World War 2 and it was ‘just wrong’ that this man was tainting that legacy by wearing the uniform.", + "length": 145 + }, + { + "text": "Speaking to Fox on Tuesday, Berk said that while the man had initially provided some answers that could have been correct, his story quickly unraveled .", + "length": 152 + }, + { + "text": "When asked where he had received his CIBs, the man first answered that he got all three in Afghanistan - an impossibility since only one is given per campaign.", + "length": 159 + }, + { + "text": "Attorney in Philadelphia to determine if there was evidence of a federal crime against 'Yetman' Berk has no doubts that the man he confronted was not a ranger.", + "length": 159 + }, + { + "text": "Along with an American flag patch in the wrong spot, Berk noticed that Yetman was wearing three Combat Infantryman Badges (CIBs) on his shoulder – an extremely rare honor.", + "length": 173 + }, + { + "text": "The pudgy man in uniform, who identified himself in the video as Sean Yetman, claimed he had 'literally just got home from Fort Lewis' and was shopping with his Sergeant Major .", + "length": 178 + }, + { + "text": "' Ryan Berk, a Afghanistan vet and Purple Heart recipient, was shopping at the mall with his girlfriend and her son when he spotted the 'fake' officer and decided to approach him .", + "length": 180 + }, + { + "text": "The account belongs to Guardian of Valor, a network of active and veteran soldiers who seek to expose people who falsely claim military service and/or claim unauthorized medals or tabs.", + "length": 185 + }, + { + "text": "‘I noticed his combat infantryman's badge, he had two stars above it, which would indicate that he served in three different wars which is almost physically impossible for his age,’ said Berk.", + "length": 196 + }, + { + "text": "The conversation got more heated as Berk then called the man, who wore a name ribbon identifying himself as ‘Yetman' and who had introduced himself as 'Sean', a phony and accused him of impersonating a soldier.", + "length": 212 + }, + { + "text": "‘No one is going to question a guy in uniform, unless they already wear the uniform,’ Berk, who earned a Purple Heart after he was wounded by shrapnel in 2010 while fighting in Afghanistan, told The Morning Call.", + "length": 216 + }, + { + "text": "Ryan Berk, a Purple Heart recipient, spotted the man - who was dressed in camouflage fatigues and who identified himself as Sean Yetman - at the Oxford Valley Mall and was immediately suspicious of his mismatched uniform.", + "length": 221 + }, + { + "text": "Speaking to Fox on Tuesday, Berk said that while the man had initially provided some answers that could have been correct, his story quickly unraveled when he claimed to have received his three CIBs from service in Afghanistan.", + "length": 227 + }, + { + "text": "'I served in Afghanistan and have several friend who were seriously wounded and a couple who were killed in action who wore that same uniform and to see someone try and claim that type of sacrifice really irked me,’ said Berk.", + "length": 228 + }, + { + "text": "Under the Stolen Valor Act of 2013, it is illegal for an individual to fraudulently portray him or herself as a recipient of any of several specified military decorations or medals with the intent to obtain money, property or other ‘tangible benefit.", + "length": 252 + }, + { + "text": "An Afghanistan veteran confronted a man he believed to be a ‘fake’ solider at a Pennsylvania shopping mall on Black Friday and accused him of being a phony after he watched him parade around wearing military honors that he couldn't possibly have been awarded.", + "length": 264 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.800032377243042 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:04.549472887Z", + "first_section_created": "2025-12-23T09:35:04.550144214Z", + "last_section_published": "2025-12-23T09:35:04.550442326Z", + "all_results_received": "2025-12-23T09:35:04.665001876Z", + "output_generated": "2025-12-23T09:35:04.665269687Z", + "total_processing_time_ms": 115, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 114, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:04.550144214Z", + "publish_time": "2025-12-23T09:35:04.550442326Z", + "first_worker_start": "2025-12-23T09:35:04.551126154Z", + "last_worker_end": "2025-12-23T09:35:04.660228Z", + "total_journey_time_ms": 110, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:04.551054451Z", + "start_time": "2025-12-23T09:35:04.551126154Z", + "end_time": "2025-12-23T09:35:04.551219158Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:04.551408Z", + "start_time": "2025-12-23T09:35:04.551557Z", + "end_time": "2025-12-23T09:35:04.660228Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 108 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:04.551110354Z", + "start_time": "2025-12-23T09:35:04.551177056Z", + "end_time": "2025-12-23T09:35:04.551289661Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:04.551187257Z", + "start_time": "2025-12-23T09:35:04.55126166Z", + "end_time": "2025-12-23T09:35:04.551303861Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 108, + "min_processing_ms": 108, + "max_processing_ms": 108, + "avg_processing_ms": 108, + "median_processing_ms": 108, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4609, + "slowest_section_id": 0, + "slowest_section_time_ms": 110 + } +} diff --git a/data/output/0051fa4bfd4e93b54c943067b76b3ae89ba8a812.json b/data/output/0051fa4bfd4e93b54c943067b76b3ae89ba8a812.json new file mode 100644 index 0000000..bfdfa69 --- /dev/null +++ b/data/output/0051fa4bfd4e93b54c943067b76b3ae89ba8a812.json @@ -0,0 +1,246 @@ +{ + "file_name": "0051fa4bfd4e93b54c943067b76b3ae89ba8a812.txt", + "total_words": 359, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "his", + "count": 15 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "as", + "count": 7 + }, + { + "word": "it", + "count": 7 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "that", + "count": 7 + }, + { + "word": "he", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "...", + "length": 3 + }, + { + "text": "'Ewww!", + "length": 6 + }, + { + "text": "' he exclaims while disapprovingly shaking his head.", + "length": 52 + }, + { + "text": "'We'll use that for toilet paper,' he adds chuckling.", + "length": 53 + }, + { + "text": "At Christmas the saying goes 'be grateful for what you get.", + "length": 59 + }, + { + "text": "His family are heard laughing as they watch his outraged reaction.", + "length": 66 + }, + { + "text": "' But this child certainly wasn't impressed with the gift he received.", + "length": 70 + }, + { + "text": "Unimpressed: 'Ewww,' he exclaims while disapprovingly shaking his head .", + "length": 72 + }, + { + "text": "Fisher, who filmed the incident, said that the jersey was gifted as a joke.", + "length": 75 + }, + { + "text": "Caught on camera: His family are heard laughing as they watch his outraged reaction .", + "length": 85 + }, + { + "text": "Unfortunately, the item of clothing was too small, and it was too late to send it back.", + "length": 87 + }, + { + "text": "Present time: The youngster is seen excitedly opening the gift and ripping the paper off .", + "length": 90 + }, + { + "text": "'That's my boy' the man says as a look of disdain continues to spread across his son's face.", + "length": 92 + }, + { + "text": "The little boy then gets up from the table and moves over to his father for a reassuring hug.", + "length": 93 + }, + { + "text": "Source of amusement: Fisher, who filmed the incident, said that the jersey was gifted as a joke .", + "length": 98 + }, + { + "text": "Dean Fisher filmed the moment his six-year-old relative unwrapped a jersey for his ice hockey team's rival club.", + "length": 112 + }, + { + "text": "So instead the family decided to 'have some fun' with the resident Flyers fan and capture his appalled reaction.", + "length": 112 + }, + { + "text": "His sister ordered the Sidney Crosby jersey for her son as a Christmas gift - Crosby is the captain of the Pittsburgh Penguins.", + "length": 127 + }, + { + "text": "Get ready for it: Dean Fisher filmed the moment his six-year-old relative received a jersey for his ice hockey team's rival club this Christmas .", + "length": 145 + }, + { + "text": "That's until he spots that it is a yellow and black Pittsburgh Penguins long-sleeved top instead of an orange and black one for the Philadelphia Flyers .", + "length": 153 + }, + { + "text": "Passing it on: His sister ordered the Sidney Crosby jersey for her son as a Christmas gift - Crosby is the captain of the Pittsburgh Penguins - but unfortunately it was too small .", + "length": 180 + }, + { + "text": "The youngster is seen excitedly opening the present until he spots that it is a yellow and black Pittsburgh Penguins long-sleeved top instead of an orange and black one for the Philadelphia Flyers.", + "length": 197 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.3786434531211853 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:05.051164847Z", + "first_section_created": "2025-12-23T09:35:05.051746971Z", + "last_section_published": "2025-12-23T09:35:05.051983481Z", + "all_results_received": "2025-12-23T09:35:05.21854584Z", + "output_generated": "2025-12-23T09:35:05.218809551Z", + "total_processing_time_ms": 167, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 166, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:05.051746971Z", + "publish_time": "2025-12-23T09:35:05.051983481Z", + "first_worker_start": "2025-12-23T09:35:05.052729811Z", + "last_worker_end": "2025-12-23T09:35:05.217093Z", + "total_journey_time_ms": 165, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:05.052750312Z", + "start_time": "2025-12-23T09:35:05.052815614Z", + "end_time": "2025-12-23T09:35:05.052862816Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:05.053049Z", + "start_time": "2025-12-23T09:35:05.053196Z", + "end_time": "2025-12-23T09:35:05.217093Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 163 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:05.05270181Z", + "start_time": "2025-12-23T09:35:05.052767612Z", + "end_time": "2025-12-23T09:35:05.052820415Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:05.052665208Z", + "start_time": "2025-12-23T09:35:05.052729811Z", + "end_time": "2025-12-23T09:35:05.052753812Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 163, + "min_processing_ms": 163, + "max_processing_ms": 163, + "avg_processing_ms": 163, + "median_processing_ms": 163, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2046, + "slowest_section_id": 0, + "slowest_section_time_ms": 165 + } +} diff --git a/data/output/00526b12fa75230581d000659b8e4f3efc186553.json b/data/output/00526b12fa75230581d000659b8e4f3efc186553.json new file mode 100644 index 0000000..ee89788 --- /dev/null +++ b/data/output/00526b12fa75230581d000659b8e4f3efc186553.json @@ -0,0 +1,270 @@ +{ + "file_name": "00526b12fa75230581d000659b8e4f3efc186553.txt", + "total_words": 597, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "care", + "count": 16 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "that", + "count": 12 + }, + { + "word": "it", + "count": 11 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "are", + "count": 8 + }, + { + "word": "not", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "It stinks.", + "length": 10 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "It can be truly heartbreaking.", + "length": 30 + }, + { + "text": "The worst could be closed down.", + "length": 31 + }, + { + "text": "'One person told us, 'I find it a terrible place: it's a diabolical place.", + "length": 74 + }, + { + "text": "'We have to be clear that putting up with poor care is not what anybody is expected to do.", + "length": 90 + }, + { + "text": "'And let's not forget, it doesn't just affect the individual – it affects the whole family.", + "length": 93 + }, + { + "text": "I am sure that this will mean people can be confident in the judgments our inspections will make.", + "length": 97 + }, + { + "text": "Sometimes it is abuse – older people treated roughly, worrying bruises that have no explanation.", + "length": 98 + }, + { + "text": "Secretly filmed: Joan Maddison was seen being slapped by the carers that were supposed to look after her .", + "length": 106 + }, + { + "text": "She added: 'Week in, week out, our inspectors discover some truly awful care which should not be happening.", + "length": 107 + }, + { + "text": "Ignored: Great-grandmother Yvonne Grant, 98, begged to be taken to the toilet for up to two-and-a-half hours .", + "length": 110 + }, + { + "text": "'These examples of failing care and their impact just reinforce my determination to make sure we call time on poor care.", + "length": 120 + }, + { + "text": "'It can all add up to a thoroughly miserable and frightening experience for people often in the most vulnerable of circumstances.", + "length": 129 + }, + { + "text": "Teams of inspectors, including experts in dementia care, will grade them as outstanding, good, requires improvement or inadequate.", + "length": 130 + }, + { + "text": "Miss Sutcliffe added: 'Ratings characteristics are an important part of our new approach to inspecting and rating adult social care.", + "length": 132 + }, + { + "text": "Elsewhere, residents were being dressed in someone else's clothes that did not fit because care workers were too busy to find the right ones.", + "length": 141 + }, + { + "text": "Elderly people in care homes are being treated roughly and suffering 'worrying bruises with no explanation', according to the chief inspector.", + "length": 142 + }, + { + "text": "Andrea Sutcliffe, of the Care Quality Commission, said that 'some truly awful care' was being uncovered 'week in, week out' during inspections.", + "length": 143 + }, + { + "text": "This week the watchdog will begin inspecting all 25,000 care homes in England using a new system which will see them given Ofsted-style ratings.", + "length": 144 + }, + { + "text": "'Sometimes it is neglect – people living with dementia not supported to eat and drink so they end up with malnutrition or life-threatening dehydration.", + "length": 153 + }, + { + "text": "She revealed that her inspectors had discovered patients with dementia who had life-threatening dehydration and malnutrition because staff were not helping them.", + "length": 161 + }, + { + "text": "Sometimes, the worst part of the letters I read is the distress and guilt the family feel when they discover the service they trusted had betrayed the people they cared about.", + "length": 175 + }, + { + "text": "Miss Sutcliffe, chief inspector of adult social care at the commission, has told inspectors to apply the 'mum test' when evaluating homes: would they leave their own parents there?", + "length": 180 + }, + { + "text": "'They will allow our inspectors to really get under the skin of adult social care services so that providers know what we are expecting and how we will consistently rate their services.", + "length": 185 + }, + { + "text": "' The Care Quality Commission said that its new inspections system is designed to ensure inspectors are 'consistent when making judgments' and would 'help care providers understand' how ratings are being awarded.", + "length": 212 + }, + { + "text": "' ' She added: 'Too often we find services that need to change but the people using those services are putting up with awful care and say 'it's not perfect' or 'the staff are very good; you get the odd one but you can't help that'.", + "length": 231 + }, + { + "text": "Sometimes it is a shocking lack of respect for people's dignity – dressed in someone else's clothes that don't fit, men not having a regular shave because staff are too busy, no one taking the time to find out what makes you happy or just talk to you.", + "length": 253 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8599287271499634 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:05.552847707Z", + "first_section_created": "2025-12-23T09:35:05.553127418Z", + "last_section_published": "2025-12-23T09:35:05.553297025Z", + "all_results_received": "2025-12-23T09:35:05.617686338Z", + "output_generated": "2025-12-23T09:35:05.617869446Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:05.553127418Z", + "publish_time": "2025-12-23T09:35:05.553297025Z", + "first_worker_start": "2025-12-23T09:35:05.554032255Z", + "last_worker_end": "2025-12-23T09:35:05.61666Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:05.553987953Z", + "start_time": "2025-12-23T09:35:05.554059756Z", + "end_time": "2025-12-23T09:35:05.554114259Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:05.554194Z", + "start_time": "2025-12-23T09:35:05.554318Z", + "end_time": "2025-12-23T09:35:05.61666Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:05.553967653Z", + "start_time": "2025-12-23T09:35:05.554032255Z", + "end_time": "2025-12-23T09:35:05.554102358Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:05.554020555Z", + "start_time": "2025-12-23T09:35:05.554084457Z", + "end_time": "2025-12-23T09:35:05.554111758Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3526, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0053278402828d9c471a1df7a93593a32f614a24.json b/data/output/0053278402828d9c471a1df7a93593a32f614a24.json new file mode 100644 index 0000000..e59fba8 --- /dev/null +++ b/data/output/0053278402828d9c471a1df7a93593a32f614a24.json @@ -0,0 +1,218 @@ +{ + "file_name": "0053278402828d9c471a1df7a93593a32f614a24.txt", + "total_words": 419, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "she", + "count": 15 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "that", + "count": 13 + }, + { + "word": "had", + "count": 12 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "and", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "’ He said that an ambulance arrived but the paramedics were unable to save her.", + "length": 81 + }, + { + "text": "Drowned: Emma Campbell, of Winchfield, Hampshire, was found in the deep end of the pool in Menorca .", + "length": 100 + }, + { + "text": "Miss Campbell’s mother, Sally-Anne Le Clerc, said that the couple had an appointment to view the church that morning.", + "length": 119 + }, + { + "text": "Mr Mudford added that Miss Campbell had not spoken of her intentions that morning as she was not a ‘morning person’.", + "length": 120 + }, + { + "text": "I jumped in and immediately pulled Emma to the surface and performed CPR to the best of my ability and screamed for help.", + "length": 121 + }, + { + "text": "He had gone for a jog that morning on July 3, having left his 29-year-old fiancée drinking a glass of orange juice at the pool-side.", + "length": 133 + }, + { + "text": "Emma Campbell, of Winchfield, Hampshire, was found in the pool's deep end on the Spanish island of Menorca by her fiancé George Mudford.", + "length": 137 + }, + { + "text": "She said: ‘They were looking at venues to be married and they had got an appointment, they had decided to look at the church at that time.", + "length": 140 + }, + { + "text": "In a statement read to the Basingstoke inquest, he said: ‘When I came home from a run I found my fiancée face down in the deep end of the pool.", + "length": 146 + }, + { + "text": "North Hampshire coroner Andrew Bradley said that post mortem examinations carried out in Menorca and Basingstoke gave a cause of death consistent with drowning.", + "length": 160 + }, + { + "text": "’ Ms Le Clerc told the hearing that her daughter had been born with a heart defect, pulmonary atresia, which she explained meant that she had only one pumping chamber.", + "length": 169 + }, + { + "text": "She said that she had undergone operations when she a baby and at three years of age, but had not suffered any long-term effects and was not on any medication for the condition.", + "length": 177 + }, + { + "text": "A bride-to-be drowned in the swimming pool of her family’s holiday home on the morning that she was going to view the church where she was to be married, an inquest heard today.", + "length": 179 + }, + { + "text": "’ Abroad: Miss Campbell had been at her family's holiday home on the Spanish island of Menorca (file picture) She added: ‘She was well able to swim, she knew the pool, she had been there from a very early age, she was a good swimmer.", + "length": 237 + }, + { + "text": "Recording a verdict of accidental death with a background of Miss Campbell’s heart condition, he said: ‘I have no doubt it’s an accidental death, had she not been under water, the nature of this event would not have happened, it’s that helpless situation.", + "length": 263 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.45176342129707336 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:06.054250356Z", + "first_section_created": "2025-12-23T09:35:06.054530167Z", + "last_section_published": "2025-12-23T09:35:06.054727575Z", + "all_results_received": "2025-12-23T09:35:06.123146952Z", + "output_generated": "2025-12-23T09:35:06.123282457Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:06.054530167Z", + "publish_time": "2025-12-23T09:35:06.054727575Z", + "first_worker_start": "2025-12-23T09:35:06.055224395Z", + "last_worker_end": "2025-12-23T09:35:06.122119Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:06.055138892Z", + "start_time": "2025-12-23T09:35:06.055224395Z", + "end_time": "2025-12-23T09:35:06.055282097Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:06.05546Z", + "start_time": "2025-12-23T09:35:06.055598Z", + "end_time": "2025-12-23T09:35:06.122119Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:06.055243896Z", + "start_time": "2025-12-23T09:35:06.055308799Z", + "end_time": "2025-12-23T09:35:06.055358201Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:06.055222295Z", + "start_time": "2025-12-23T09:35:06.055280597Z", + "end_time": "2025-12-23T09:35:06.055310899Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2296, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/0053ab506d5829dfe97ce4177f43f27ae840ab89.json b/data/output/0053ab506d5829dfe97ce4177f43f27ae840ab89.json new file mode 100644 index 0000000..1c6117a --- /dev/null +++ b/data/output/0053ab506d5829dfe97ce4177f43f27ae840ab89.json @@ -0,0 +1,456 @@ +{ + "file_name": "0053ab506d5829dfe97ce4177f43f27ae840ab89.txt", + "total_words": 1155, + "top_n_words": [ + { + "word": "the", + "count": 63 + }, + { + "word": "to", + "count": 38 + }, + { + "word": "a", + "count": 33 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "is", + "count": 18 + }, + { + "word": "lambie", + "count": 18 + }, + { + "word": "i", + "count": 14 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "ebola", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "H.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "A.", + "length": 2 + }, + { + "text": "Thanks Mr Chair.", + "length": 16 + }, + { + "text": "LAMBIE: Thank you.", + "length": 18 + }, + { + "text": "I just want to know...", + "length": 22 + }, + { + "text": "'It's not my moral upbringing.", + "length": 30 + }, + { + "text": "Are your men and women protected?", + "length": 33 + }, + { + "text": "I'm Australian,' she told Sky News.", + "length": 35 + }, + { + "text": "I'm not sure what their contracts are.", + "length": 38 + }, + { + "text": "And what is your contingency plan to fight this?", + "length": 48 + }, + { + "text": "LAMBIE: I'm just asking if you have a contingency plan.", + "length": 55 + }, + { + "text": "Ebola has been a pet topic for Ms Lambie in recent weeks.", + "length": 57 + }, + { + "text": "Last Friday, Ms Lambie proposed the government set up 'M.", + "length": 57 + }, + { + "text": "Are they prepared to fight the Ebola alongside the military?", + "length": 60 + }, + { + "text": "GRIGGS: I'll have to take the exact numbers on notice please.", + "length": 61 + }, + { + "text": "'It's just not my moral upbringing and I'm just not comfortable.", + "length": 64 + }, + { + "text": "LAMBIE: And so are the civilian doctors and civilian nurses that...", + "length": 67 + }, + { + "text": "And that the length of time that it takes simply to get to Australia.", + "length": 69 + }, + { + "text": "Is there a contingency plan through the Defence Force to tackle the Ebola issue.", + "length": 80 + }, + { + "text": "GRIGGS: Senator, our men and women are protected as anyone else in the country is.", + "length": 82 + }, + { + "text": "I'm not sure the ratio between defence and civilian now within that medical corps...", + "length": 84 + }, + { + "text": "Our health system has its preparations ready for the virus appearing in the country.", + "length": 84 + }, + { + "text": "How big is the corps, the medical corps, within the armed forces, which is military?", + "length": 84 + }, + { + "text": "LAMBIE: I was just wondering if I could ask a supplementary on Ebola, if that's OK.", + "length": 84 + }, + { + "text": "' Opposed to the wearing of the burqa, the senator has spoken out against sharia law.", + "length": 85 + }, + { + "text": "style' mobile field hospitals to fight Ebola as part of a virus-fighting 'action plan'.", + "length": 87 + }, + { + "text": "So are they prepared - do they get to move out if they do not want to fight this if it hits?", + "length": 92 + }, + { + "text": "GRIGGS: Well, I don't believe there is any evidence to suggest that is a likely course of action.", + "length": 97 + }, + { + "text": "After admitting Muslims were also Australian, she said she would not feel comfortable at a mosque.", + "length": 98 + }, + { + "text": "One of the things, Senator, that we have playing to our advantage is our geographic (sic) isolation.", + "length": 100 + }, + { + "text": "And (with) the life cycle of the virus, it becomes very obvious relatively quickly how sick someone is.", + "length": 103 + }, + { + "text": "The response to that hypothetical situation would be the response to any outbreak of Ebola in Australia.", + "length": 104 + }, + { + "text": "'Senator, our men and women are as protected as anyone else in the country is,' Vice Admiral Griggs said.", + "length": 105 + }, + { + "text": "'I don't believe there is any evidence to suggest that is a likely course of action,' he told the hearing.", + "length": 106 + }, + { + "text": "Officials replied by saying there was no evidence we could come under attack from the hypothetical scourge.", + "length": 107 + }, + { + "text": "So I think the ability to inject someone into the country who is not showing symptoms would be quite difficult.", + "length": 111 + }, + { + "text": "Do you have any information to suggest Australia's enemies could use attack our country or military using the virus?", + "length": 116 + }, + { + "text": "Turning down the invitation, Senator Lambie said she wouldn't enter a mosque because she was Catholic and religious.", + "length": 116 + }, + { + "text": "She has previously said any Australian who supports sharia should not have the right to vote or get welfare payments.", + "length": 117 + }, + { + "text": "But Vice Admiral Griggs said there was no proof militants were capable of attacking Australia or its troops with Ebola.", + "length": 119 + }, + { + "text": "Because if you listen to one side of it, they can say this can take off 10,000 a week, it can hit people by 10,000 a week.", + "length": 122 + }, + { + "text": "'I'm certainly not going to go out there just because it's going to be a media blitz standing outside a mosque,' she said.", + "length": 122 + }, + { + "text": "For example with suicide agents who are infected by the disease or who have access to bodily fluids containing the disease.", + "length": 123 + }, + { + "text": "' 'For example with suicide agents who are infected by the disease or who have access to bodily fluids containing the disease.", + "length": 126 + }, + { + "text": "More than 4500 people have died in the three worst hit West African nations of Liberia, Guinea and Sierra Leone, WHO figures said.", + "length": 130 + }, + { + "text": "He said because of Australia's isolation, it would be 'quite difficult' for a person to enter the country without showing symptoms.", + "length": 131 + }, + { + "text": "The Ebola virus: The deadly ailment has killed 4500 people in West Africa, according to the latest World Health Organisation figures .", + "length": 134 + }, + { + "text": "Turning down the invitation, Senator Jacqui Lambie said she wouldn't enter a mosque (pictured) because she was Catholic and religious .", + "length": 135 + }, + { + "text": "Ms Lambie asked: 'Do you have any information to suggest that Australia's enemies could attack our country or military using the virus?", + "length": 135 + }, + { + "text": "' Ms Lambie, a former Army soldier, then pressed Vice-Admiral Griggs on whether the Defence Force had a contingency plan for the scenario.", + "length": 138 + }, + { + "text": "According to the World Health Organisation (WHO), around 10,000 people could die of the virus each week if it is not brought under control.", + "length": 139 + }, + { + "text": "Outspoken Palmer United Party senator Jacqui Lambie has rejected a Liberal MP's invitation to visit a mosque because she is Australian and a Catholic.", + "length": 150 + }, + { + "text": "Outspoken Palmer United Party senator Jacqui Lambie has rejected a Liberal MP's invitation to visit a mosque because she is Australian and a Catholic .", + "length": 151 + }, + { + "text": "Last week, Ms Lambie told Daily Mail Australia the 'If you don't love it, leave' slogan on this controversial singlet sold at Woolworths did not go far enough .", + "length": 160 + }, + { + "text": "Her comments follow the controversial senator quizzing defence force top brass over the threat Ebola-infected suicide terrorists could pose to Australia and its military.", + "length": 170 + }, + { + "text": "Ms Lambie asked a series of questions about the ADF's readiness for Ebola at a Senate Foreign Affairs, Defence and Trade Legislation Committee hearing on Wednesday morning .", + "length": 173 + }, + { + "text": "While there have been a number of scares at Australian hospitals in recent months - including the case of volunteer Red Cross nurse Sue Ellen Kovack - no Australian has reported an infection.", + "length": 191 + }, + { + "text": "Not at risk: Vice Admiral Ray Griggs said it would be 'quite difficult' for a person to enter the country without showing Ebola symptoms, particularly given the nation's geographical isolation .", + "length": 194 + }, + { + "text": "At a Senate Estimates hearing Wednesday morning, the Tasmanian senator asked Vice Chief of the Defence Force Ray Griggs if the nation's enemies could launch an attack with the deadly virus, Ebola.", + "length": 197 + }, + { + "text": "Liberal MP Craig Laundy invited Senator Lambie to his Western Sydney electorate on Saturday to visit one of the large number of mosques holding open days, saying she should familiarise herself with Islamic people.", + "length": 213 + }, + { + "text": "In recent months, Ms Lambie, elected to a six-year term in the Senate last year, has waded into a number of controversial issues including the debate surrounding the burqa and Australia's preparedness for the Ebola virus .", + "length": 222 + }, + { + "text": "Liberal MP Craig Laundy (pictured) invited Senator Lambie to his Western Sydney electorate on Saturday to visit one of the large number of mosques holding open days, saying she should familiarise herself with Islamic people .", + "length": 225 + }, + { + "text": "'I don't believe there is any evidence to suggest that is a likely course of action': Senator Jacqui Lambie (left) and Australian Defence Force Vice Chief Ray Griggs faced off at a senate estimates committee hearing on Wednesday morning .", + "length": 238 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7170371413230896 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:06.555503898Z", + "first_section_created": "2025-12-23T09:35:06.557056361Z", + "last_section_published": "2025-12-23T09:35:06.557487178Z", + "all_results_received": "2025-12-23T09:35:06.643752979Z", + "output_generated": "2025-12-23T09:35:06.643975188Z", + "total_processing_time_ms": 88, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:06.557056361Z", + "publish_time": "2025-12-23T09:35:06.557350173Z", + "first_worker_start": "2025-12-23T09:35:06.557735689Z", + "last_worker_end": "2025-12-23T09:35:06.642843Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:06.557705887Z", + "start_time": "2025-12-23T09:35:06.55778139Z", + "end_time": "2025-12-23T09:35:06.557909996Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:06.558092Z", + "start_time": "2025-12-23T09:35:06.558204Z", + "end_time": "2025-12-23T09:35:06.642843Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:06.557713488Z", + "start_time": "2025-12-23T09:35:06.55777709Z", + "end_time": "2025-12-23T09:35:06.557945397Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:06.557649585Z", + "start_time": "2025-12-23T09:35:06.557735689Z", + "end_time": "2025-12-23T09:35:06.557786591Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:06.557423476Z", + "publish_time": "2025-12-23T09:35:06.557487178Z", + "first_worker_start": "2025-12-23T09:35:06.557830092Z", + "last_worker_end": "2025-12-23T09:35:06.633946Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:06.557903495Z", + "start_time": "2025-12-23T09:35:06.5580188Z", + "end_time": "2025-12-23T09:35:06.558055402Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:06.558092Z", + "start_time": "2025-12-23T09:35:06.558217Z", + "end_time": "2025-12-23T09:35:06.633946Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:06.557861394Z", + "start_time": "2025-12-23T09:35:06.557915396Z", + "end_time": "2025-12-23T09:35:06.557960198Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:06.557793491Z", + "start_time": "2025-12-23T09:35:06.557830092Z", + "end_time": "2025-12-23T09:35:06.557862094Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 159, + "min_processing_ms": 75, + "max_processing_ms": 84, + "avg_processing_ms": 79, + "median_processing_ms": 84, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3336, + "slowest_section_id": 0, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/0053c714b9717aaa2747a4751685277cbb169bc8.json b/data/output/0053c714b9717aaa2747a4751685277cbb169bc8.json new file mode 100644 index 0000000..2f9096b --- /dev/null +++ b/data/output/0053c714b9717aaa2747a4751685277cbb169bc8.json @@ -0,0 +1,254 @@ +{ + "file_name": "0053c714b9717aaa2747a4751685277cbb169bc8.txt", + "total_words": 475, + "top_n_words": [ + { + "word": "the", + "count": 51 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "ball", + "count": 12 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "is", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "opera", + "count": 9 + }, + { + "word": "as", + "count": 8 + }, + { + "word": "in", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Dancers crowd the dance floor during traditional Opera Ball .", + "length": 61 + }, + { + "text": "A debutante pulls on her long-sleeved gloves ahead of the ball .", + "length": 64 + }, + { + "text": "The young debutantes have been rehearsing their opening waltz for weeks .", + "length": 73 + }, + { + "text": "Taking part is still seen as a rite of passage for some young Austrian women .", + "length": 78 + }, + { + "text": "The opulence of the Vienna Opera House only adds to the glamour of the evening .", + "length": 80 + }, + { + "text": "186 debutantes and their partners dance the opening waltz at the Vienna Opera Ball .", + "length": 84 + }, + { + "text": "The Opera Ball is one of the most privileged events in the Austrian social calendar .", + "length": 85 + }, + { + "text": "The 18-24 year olds spend weeks rehearsing for the dance, which lasts just four minutes.", + "length": 88 + }, + { + "text": "As far as glamour goes, the Vienna Opera Ball is up there with the BAFTAs and The Met Ball.", + "length": 91 + }, + { + "text": "The young women are treated to a professional make-up artist backstage at the Opera House .", + "length": 91 + }, + { + "text": "The debutantes are led into the room by their partners - weeks of preparation have led to this moment .", + "length": 103 + }, + { + "text": "They include the Pharmacists’ ball, Lawyers’ ball, Coffeemakers ball - and even a weightlifters’ ball.", + "length": 108 + }, + { + "text": "Taking part is still seen as a rite of passage for some young Austrian women - it is their introduction into society.", + "length": 117 + }, + { + "text": "Beaming debutantes are put through their paces on the dance floor as the rest of the guests look on from their seats .", + "length": 118 + }, + { + "text": "A group of debutantes wait in the wings of the Vienna Opera House before they take to the floor for the opening waltz .", + "length": 119 + }, + { + "text": "The backstage area of the Opera House is a commotion of young women from early afternoon as the girls descend to prepare their outfits .", + "length": 136 + }, + { + "text": "To kick-start the festivities, 186 debutantes and their partners dance the opening waltz, under the watchful gaze of the older guests.", + "length": 137 + }, + { + "text": "While the women are required to wear a long white dress, they all have different embellishments, like corset-style lacing and sequinned-edging .", + "length": 144 + }, + { + "text": "Although the Opera Ball is the best known, with tickets starting at £220 and a box as much as £16,300, the city hosts more than 450 of them every year.", + "length": 154 + }, + { + "text": "But while the moment is fleeting, it is also hugely popular - hundreds of thousands of people tune in to watch the dance played out from living rooms across the globe.", + "length": 167 + }, + { + "text": "In the UK, these traditions have been almost entirely forgotten, but Vienna's long history with balls means that it still a large part of becoming an adult in Austria.", + "length": 167 + }, + { + "text": "Dressed in the traditional white debutante gowns, with diamante tiaras and long-sleeved white gloves, the image of the young women is a throwback to early 20th century Europe.", + "length": 175 + }, + { + "text": "The ball is one of the most exclusive events in the Viennese social calendar and every year, Austria's most important men and women descend on the Wiener Staatsoper in Vienna, to rub shoulders with stars like Kim Kardashian and Hilary Swank.", + "length": 241 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.41011756658554077 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:07.058267758Z", + "first_section_created": "2025-12-23T09:35:07.058657374Z", + "last_section_published": "2025-12-23T09:35:07.058863283Z", + "all_results_received": "2025-12-23T09:35:07.11984919Z", + "output_generated": "2025-12-23T09:35:07.119997796Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:07.058657374Z", + "publish_time": "2025-12-23T09:35:07.058863283Z", + "first_worker_start": "2025-12-23T09:35:07.059342502Z", + "last_worker_end": "2025-12-23T09:35:07.118945Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:07.059371503Z", + "start_time": "2025-12-23T09:35:07.059435306Z", + "end_time": "2025-12-23T09:35:07.059503409Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:07.059594Z", + "start_time": "2025-12-23T09:35:07.059759Z", + "end_time": "2025-12-23T09:35:07.118945Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:07.059328302Z", + "start_time": "2025-12-23T09:35:07.059383804Z", + "end_time": "2025-12-23T09:35:07.059454607Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:07.059274899Z", + "start_time": "2025-12-23T09:35:07.059342502Z", + "end_time": "2025-12-23T09:35:07.059374104Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2727, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0053f8792e0efbb97cfb44c872f60b644e255b47.json b/data/output/0053f8792e0efbb97cfb44c872f60b644e255b47.json new file mode 100644 index 0000000..f424f1e --- /dev/null +++ b/data/output/0053f8792e0efbb97cfb44c872f60b644e255b47.json @@ -0,0 +1,306 @@ +{ + "file_name": "0053f8792e0efbb97cfb44c872f60b644e255b47.txt", + "total_words": 668, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "a", + "count": 25 + }, + { + "word": "he", + "count": 22 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "zbudowskyj", + "count": 15 + }, + { + "word": "at", + "count": 14 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "his", + "count": 13 + }, + { + "word": "in", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Harriet Arkell .", + "length": 16 + }, + { + "text": "10:42 EST, 21 January 2014 .", + "length": 28 + }, + { + "text": "10:52 EST, 21 January 2014 .", + "length": 28 + }, + { + "text": "Prosecutor Brian Simpson said: .", + "length": 32 + }, + { + "text": "turned up after he’d been drinking.", + "length": 37 + }, + { + "text": "'Zbudowskyj was booked to work as a stripper for a ladies night but he .", + "length": 72 + }, + { + "text": "' The court heard he then went on to attack the compere and a member of the audience.", + "length": 85 + }, + { + "text": "' Fabio later appeared at the club for free to make up for the women’s disappointment.", + "length": 88 + }, + { + "text": "Zbudowskyj smiled in court today as the events of the night in January last year were related.", + "length": 94 + }, + { + "text": "'He bit Mr Morgan on the chest, and he was only released when he managed to twist his body away.", + "length": 96 + }, + { + "text": "Erotic act: Zbudowskyj was performing in a fireman's uniform but it failed to excite his female audience .", + "length": 106 + }, + { + "text": "'He used the gents toilet as a changing room and the compere, Deborah Jones, saw him pull his trousers down.", + "length": 108 + }, + { + "text": "Leon Zbudowskyj, 30, lashed out when he was heckled by the female audience at the Baltic Inn, near Newport .", + "length": 108 + }, + { + "text": "Zbudowskyj, left, was standing in at the last minute for regular Baltic Inn stripper, Fabio, pictured right .", + "length": 109 + }, + { + "text": "Venue: Five police cars arrived to break up the scuffle at the nightspot in the village of Pontyates, pictured .", + "length": 112 + }, + { + "text": "' Zbudowskyj had admitted three charges of common assault and one of damaging the glasses at an earlier hearing.", + "length": 112 + }, + { + "text": "Popular: The Baltic Inn hosts a regular ladies' night featuring stripper Fabio, with tickets costing £10 a head .", + "length": 114 + }, + { + "text": "Zbudowskyj denied assault and claimed he himself had been attacked at the bar in Pontyates near Llanelli, West Wales.", + "length": 117 + }, + { + "text": "'There was a struggle, and Zbudowskyj threw a beer glass at him, grabbed him by the face, and tried to gouge his eyes.", + "length": 118 + }, + { + "text": "Psychiatrist Dr Stephen Attwood told the court that Zbudowskyj was suffering from a mental disorder which 'can be treated'.", + "length": 123 + }, + { + "text": "'On stage he started spitting cream at the members of the audience - the audience began to boo and throw serviettes at him.", + "length": 123 + }, + { + "text": "'It’s only a matter of luck he has not caused serious injury to members of the public through his uncontrollable outbursts.", + "length": 125 + }, + { + "text": "Five police cars were sent to break up the scuffle, and Zbudowskyj was arrested and driven away still wearing his fireman’s outfit.", + "length": 133 + }, + { + "text": "Today the court heard he had 11 previous offences of violence and had once smashed up a flat before running naked through a town centre.", + "length": 136 + }, + { + "text": "Mr Simpson said: 'Mrs Jones described Zbudowskyj as being liked a caged animal, and he lunged at her, causing her £154 glasses to break.", + "length": 137 + }, + { + "text": "'A woman in the audience, Bethan Lewis, went to the gents toilet after seeing the fuss but Zbudowskyj grabbed her by the hair, pulling clumps of it out.", + "length": 152 + }, + { + "text": "The prosecutor said: 'He appeared agitated and aggressive - he still had his trousers down when he lunged at Mr Morgan, punching him to the side of the head.", + "length": 157 + }, + { + "text": "First-timer Leon Zbudowskyj, 30, was a last minute replacement for a regular stripper called Fabio at the £10-a-head, ladies-only night at the Baltic Inn, near Llanelli.", + "length": 170 + }, + { + "text": "But he failed to excite the crowd, and when more than 200 disappointed women started booing the 'scrawny' striptease artist, pelting him with paper napkins, he flew into a rage.", + "length": 177 + }, + { + "text": "Swansea Crown Court heard how Zbudowskyj, of Caerleon, Newport, had a few drinks to calm his nerves before changing into his fireman’s outfit in the men's lavatory at the pub.", + "length": 177 + }, + { + "text": "A stand-in male stripper lost his temper when he was heckled by his female audience, lashing out and pulling one woman's hair out in clumps - while his trousers were around his ankles.", + "length": 184 + }, + { + "text": "' The court heard Mrs Jones described the novice stripper, who was taken offstage by manager David Morgan and told he was no longer welcome, as like 'a rabbit caught in a car's headlights'.", + "length": 189 + }, + { + "text": "Judge Keith Thomas, who made Zbudowskyj the subject of a mental health order committing him to a hospital without time limit, said: 'When he loses control of himself he behaves in an uncontrollable fashion.", + "length": 206 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5667584538459778 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:07.559617271Z", + "first_section_created": "2025-12-23T09:35:07.561566751Z", + "last_section_published": "2025-12-23T09:35:07.561815661Z", + "all_results_received": "2025-12-23T09:35:07.63428264Z", + "output_generated": "2025-12-23T09:35:07.634806962Z", + "total_processing_time_ms": 75, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 72, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:07.561566751Z", + "publish_time": "2025-12-23T09:35:07.561815661Z", + "first_worker_start": "2025-12-23T09:35:07.562225078Z", + "last_worker_end": "2025-12-23T09:35:07.63336Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:07.562244379Z", + "start_time": "2025-12-23T09:35:07.562307581Z", + "end_time": "2025-12-23T09:35:07.562413286Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:07.562448Z", + "start_time": "2025-12-23T09:35:07.562581Z", + "end_time": "2025-12-23T09:35:07.63336Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:07.562172176Z", + "start_time": "2025-12-23T09:35:07.562225078Z", + "end_time": "2025-12-23T09:35:07.562296381Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:07.562175276Z", + "start_time": "2025-12-23T09:35:07.562236978Z", + "end_time": "2025-12-23T09:35:07.56227168Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3830, + "slowest_section_id": 0, + "slowest_section_time_ms": 71 + } +} diff --git a/data/output/0054cdf9dbdb21a739395b12d76cafcbce1ccd3c.json b/data/output/0054cdf9dbdb21a739395b12d76cafcbce1ccd3c.json new file mode 100644 index 0000000..ec2f056 --- /dev/null +++ b/data/output/0054cdf9dbdb21a739395b12d76cafcbce1ccd3c.json @@ -0,0 +1,378 @@ +{ + "file_name": "0054cdf9dbdb21a739395b12d76cafcbce1ccd3c.txt", + "total_words": 791, + "top_n_words": [ + { + "word": "the", + "count": 56 + }, + { + "word": "of", + "count": 28 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "and", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "were", + "count": 15 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "as", + "count": 10 + }, + { + "word": "at", + "count": 10 + }, + { + "word": "enemy", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "At .", + "length": 4 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "Air Force.", + "length": 10 + }, + { + "text": "Machines .", + "length": 10 + }, + { + "text": "Nick Enoch .", + "length": 12 + }, + { + "text": "German soldiers.", + "length": 16 + }, + { + "text": "RFC Squadron 12 in 1916.", + "length": 24 + }, + { + "text": "Manfred von Richtofen, .", + "length": 24 + }, + { + "text": "Above, a British WWI biplane .", + "length": 30 + }, + { + "text": "The flechette for sale is among the .", + "length": 37 + }, + { + "text": "'The auction will take place on July 9.", + "length": 39 + }, + { + "text": "('Arrows') would fill their troops with dread.", + "length": 46 + }, + { + "text": "'Also it was not possible to drop them with sufficient accuracy.", + "length": 64 + }, + { + "text": "bombs being dropped by hand and pistols being used on occasions.", + "length": 64 + }, + { + "text": "aerial warfare was quickly developed by both sides, initially with .", + "length": 68 + }, + { + "text": "observers’ platforms on two-seaters and intense battles commenced.", + "length": 68 + }, + { + "text": "guns were mounted to the front of planes as well as being fitted to .", + "length": 69 + }, + { + "text": "the outbreak of hostilities, warring nations had limited air forces .", + "length": 69 + }, + { + "text": "For the Germans in the trenches of World War One, the cry of 'Pfeile'!", + "length": 70 + }, + { + "text": "which flew slow and unarmed reconnaissance planes into enemy territory.", + "length": 71 + }, + { + "text": "the celebrated ace known as the Red Baron because of the colour of his .", + "length": 72 + }, + { + "text": "the Royal Naval Air Service were amalgamated in 1918 to form the Royal .", + "length": 72 + }, + { + "text": "Fokker triplane, led such ‘flying circuses’ and became a hero among .", + "length": 73 + }, + { + "text": "personal belongings of Harry Harse, an engineer stationed in France with .", + "length": 74 + }, + { + "text": "Britain, the Royal Flying Corps, which was the Army’s flying service and .", + "length": 76 + }, + { + "text": "Although deadly, they were much sought-after battlefield mementos among troops.", + "length": 79 + }, + { + "text": "'The flechettes were packed into boxes which had a special release system on it.", + "length": 80 + }, + { + "text": "'The flechettes were packed into boxes which had a special release system on it.", + "length": 80 + }, + { + "text": "Now, one such missile - regarded as Britain’s first bomb - has emerged for sale.", + "length": 82 + }, + { + "text": "' The rudimentary devices were superseded by darts with exploding tips and then full-scale bombs.", + "length": 97 + }, + { + "text": "It meant hundreds of tiny darts were going to drop on them, capable of piercing both helmet and skull.", + "length": 102 + }, + { + "text": "But they weren't very accurate and they certainly weren't very British,' said auctioneer Patrick Bogue.", + "length": 103 + }, + { + "text": "'It was only when the Germans began using ungentlemanly methods of warfare that they became more accepted.", + "length": 106 + }, + { + "text": "The pilot would fly over enemy trenches and pull a string to release them, 500 at a time, over German troops.", + "length": 109 + }, + { + "text": "'Aircraft had only just started to be used in warfare and these darts were effectively the precursor to bombs.", + "length": 110 + }, + { + "text": "The five-inch darts were dropped in batches of 500 from biplanes flying over enemy trenches to devastating effect.", + "length": 114 + }, + { + "text": "'About three arrows struck the cow, and went clean through her into the ground, after which the cow died quite suddenly.", + "length": 120 + }, + { + "text": "'It’s incredible to think that in just 30 years it went from dropping rudimentary metal darts to dropping the atom bomb.", + "length": 122 + }, + { + "text": "By this stage of the war, advanced planes such as Bristol Fighter were flying as high as 18,000ft achieving speeds of 123mph.", + "length": 125 + }, + { + "text": "Right up until the end of the war, the various nations’ air forces were evenly matched, suffering similar numbers of losses.", + "length": 126 + }, + { + "text": "In October 1918, the heaviest bomb of the war was dropped by a Handley Page at 1,650lb, an unthinkable weight at the beginning.", + "length": 127 + }, + { + "text": "'The pilot would then fly over enemy trenches and pull a string to release the darts, 500 at a time, over the German troops below.", + "length": 130 + }, + { + "text": "The German air force pioneered the use of ‘flying wings’ – large numbers of aircraft patrolling into enemy territory together.", + "length": 132 + }, + { + "text": "'But they weren’t very accurate and they certainly weren’t very British - in fact the British pilots didn’t like using them at all.", + "length": 137 + }, + { + "text": "The five-inch darts (Pfeile in German) were dropped in batches of 500 from WWI biplanes flying over enemy trenches to devastating effect .", + "length": 138 + }, + { + "text": "British War magazine, The War Illustrated, noted at the time the Royal Flying Corps’ aversion to using the projectiles, called flechettes.", + "length": 140 + }, + { + "text": "Just 31 years after the flechettes were first launched from planes on enemy targets, the Americans dropped two devastating atomic bombs on Japan.", + "length": 145 + }, + { + "text": "They were capable of inflicting horrendous wounds - but upstanding British pilots viewed their use as underhand and eventually refused to use them.", + "length": 147 + }, + { + "text": "One edition stated: 'Our aviators think arrow-dropping dirty work - because the enemy cannot hear the things coming, and because they make such nasty wounds.", + "length": 157 + }, + { + "text": "Known as flechettes, the one for sale is among the personal belongings of Harry Harse (above), an engineer stationed in France with RFC Squadron 12 in 1916 .", + "length": 157 + }, + { + "text": "Patrick Bogue, auctioneer, said: 'Harry Harse was a precision engineer who enlisted voluntarily at the outbreak of war and was involved in the manufacture of flechettes.", + "length": 169 + }, + { + "text": "It is expected to fetch £100 when it goes under the hammer at Onslows auction house in Blandford, Dorset, on behalf of relatives of Mr Harse who live in Reigate, Surrey.", + "length": 170 + }, + { + "text": "' Charles Grey, editor of The Aeroplane magazine, wrote in a 1915 edition: 'A friend of mine was at the military aerodrome when some of these arrows were being tested, with an unfortunate cow as the enemy.", + "length": 205 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.539330005645752 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:08.062323739Z", + "first_section_created": "2025-12-23T09:35:08.06259505Z", + "last_section_published": "2025-12-23T09:35:08.062794158Z", + "all_results_received": "2025-12-23T09:35:08.137918647Z", + "output_generated": "2025-12-23T09:35:08.138111355Z", + "total_processing_time_ms": 75, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 75, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:08.06259505Z", + "publish_time": "2025-12-23T09:35:08.062794158Z", + "first_worker_start": "2025-12-23T09:35:08.06332538Z", + "last_worker_end": "2025-12-23T09:35:08.13699Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:08.063250577Z", + "start_time": "2025-12-23T09:35:08.06332618Z", + "end_time": "2025-12-23T09:35:08.063411684Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:08.063588Z", + "start_time": "2025-12-23T09:35:08.063744Z", + "end_time": "2025-12-23T09:35:08.13699Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:08.063255677Z", + "start_time": "2025-12-23T09:35:08.06332538Z", + "end_time": "2025-12-23T09:35:08.063624592Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:08.063254877Z", + "start_time": "2025-12-23T09:35:08.06333368Z", + "end_time": "2025-12-23T09:35:08.063371382Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 73, + "min_processing_ms": 73, + "max_processing_ms": 73, + "avg_processing_ms": 73, + "median_processing_ms": 73, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4735, + "slowest_section_id": 0, + "slowest_section_time_ms": 74 + } +} diff --git a/data/output/0054d6d30dbcad772e20b22771153a2a9cbeaf62.json b/data/output/0054d6d30dbcad772e20b22771153a2a9cbeaf62.json new file mode 100644 index 0000000..6cfee9d --- /dev/null +++ b/data/output/0054d6d30dbcad772e20b22771153a2a9cbeaf62.json @@ -0,0 +1,182 @@ +{ + "file_name": "0054d6d30dbcad772e20b22771153a2a9cbeaf62.txt", + "total_words": 119, + "top_n_words": [ + { + "word": "the", + "count": 11 + }, + { + "word": "ship", + "count": 4 + }, + { + "word": "doctors", + "count": 3 + }, + { + "word": "that", + "count": 3 + }, + { + "word": "a", + "count": 2 + }, + { + "word": "aboard", + "count": 2 + }, + { + "word": "according", + "count": 2 + }, + { + "word": "agencia", + "count": 2 + }, + { + "word": "agency", + "count": 2 + }, + { + "word": "america", + "count": 2 + } + ], + "sorted_sentences": [ + { + "text": "The Veendam left New York 36 days ago for a South America tour.", + "length": 63 + }, + { + "text": "Federal Police told Agencia Brasil that forensic doctors were investigating her death.", + "length": 86 + }, + { + "text": "The American tourist died aboard the MS Veendam, owned by cruise operator Holland America.", + "length": 90 + }, + { + "text": "The ship's doctors told police that the woman was elderly and suffered from diabetes and hypertension, according the agency.", + "length": 124 + }, + { + "text": "The other passengers came down with diarrhea prior to her death during an earlier part of the trip, the ship's doctors said.", + "length": 124 + }, + { + "text": "(CNN) -- An American woman died aboard a cruise ship that docked at Rio de Janeiro on Tuesday, the same ship on which 86 passengers previously fell ill, according to the state-run Brazilian news agency, Agencia Brasil.", + "length": 218 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.70307856798172 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:08.563588648Z", + "first_section_created": "2025-12-23T09:35:08.565421323Z", + "last_section_published": "2025-12-23T09:35:08.565611831Z", + "all_results_received": "2025-12-23T09:35:08.623078994Z", + "output_generated": "2025-12-23T09:35:08.623181198Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 57, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:08.565421323Z", + "publish_time": "2025-12-23T09:35:08.565611831Z", + "first_worker_start": "2025-12-23T09:35:08.566096951Z", + "last_worker_end": "2025-12-23T09:35:08.622176Z", + "total_journey_time_ms": 56, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:08.566129253Z", + "start_time": "2025-12-23T09:35:08.566184355Z", + "end_time": "2025-12-23T09:35:08.566213956Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:08.566397Z", + "start_time": "2025-12-23T09:35:08.566516Z", + "end_time": "2025-12-23T09:35:08.622176Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 55 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:08.566095851Z", + "start_time": "2025-12-23T09:35:08.566143753Z", + "end_time": "2025-12-23T09:35:08.566171554Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:08.566042849Z", + "start_time": "2025-12-23T09:35:08.566096951Z", + "end_time": "2025-12-23T09:35:08.566104652Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 55, + "min_processing_ms": 55, + "max_processing_ms": 55, + "avg_processing_ms": 55, + "median_processing_ms": 55, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 710, + "slowest_section_id": 0, + "slowest_section_time_ms": 56 + } +} diff --git a/data/output/0055114867593ed8c267876b2d4bf0d2f5a97b88.json b/data/output/0055114867593ed8c267876b2d4bf0d2f5a97b88.json new file mode 100644 index 0000000..98ef7d9 --- /dev/null +++ b/data/output/0055114867593ed8c267876b2d4bf0d2f5a97b88.json @@ -0,0 +1,254 @@ +{ + "file_name": "0055114867593ed8c267876b2d4bf0d2f5a97b88.txt", + "total_words": 468, + "top_n_words": [ + { + "word": "the", + "count": 24 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "bales", + "count": 8 + }, + { + "word": "his", + "count": 8 + }, + { + "word": "s", + "count": 7 + }, + { + "word": "he", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Army Staff Sgt.", + "length": 15 + }, + { + "text": "Afghanistan shootings Fast Facts .", + "length": 34 + }, + { + "text": "\"Yes, Bob will take (the) stand ...", + "length": 35 + }, + { + "text": "CNN's Matt Smith contributed to this report.", + "length": 44 + }, + { + "text": "\"I've asked that question a million times since then.", + "length": 53 + }, + { + "text": "But he has not offered an explanation for his actions.", + "length": 54 + }, + { + "text": "\"My life has never been the same,\" Wazir told the jury.", + "length": 55 + }, + { + "text": "He pleaded not guilty to a charge of obstruction of justice.", + "length": 60 + }, + { + "text": "Yes, Bob will apologize,\" Bales' lawyer, John Henry Browne said in an e-mail to CNN.", + "length": 84 + }, + { + "text": "The plea spares the 39-year-old Bales the prospect of a death sentence in the killings.", + "length": 87 + }, + { + "text": "Bales is a member of the Army's 3rd Stryker Brigade Combat Team, an element of the 2nd Infantry Division.", + "length": 105 + }, + { + "text": "Robert Bales pleaded guilty in June to more than 30 criminal charges, including 16 premeditated murder counts.", + "length": 110 + }, + { + "text": "So far, a number of Afghan civilians have taken the stand for the prosecution to talk about what they saw and survived.", + "length": 119 + }, + { + "text": "Another witness broke down on the stand and cried out: \"For God's sake, don't ask me any more questions,\" Mikkelson wrote.", + "length": 122 + }, + { + "text": "It's been more than a year since the massacre, but Wazir said: \"I feel like it's happening right now,\" the affiliate reported.", + "length": 126 + }, + { + "text": "Mikkelson also tweeted from the sentencing hearing, which began this week at Joint Base Lewis-McChord, near Tacoma, Washington.", + "length": 127 + }, + { + "text": "KING's Mikkelson tweeted that a 12-year-old boy who survived the rampage testified about seeing his father and sister get shot.", + "length": 127 + }, + { + "text": "He now faces life in prison, but a jury of four officers and two enlisted personnel will decide whether he will have a chance at parole.", + "length": 136 + }, + { + "text": "Haji Mohammed Wazir lost 11 relatives -- his wife, mother, two brothers, a 13-year-old nephew and six of his seven children -- according to KING.", + "length": 145 + }, + { + "text": "In addition to the murder counts, Bales pleaded guilty to six counts of attempted murder, seven of assault and the use of illicit steroids and alcohol.", + "length": 151 + }, + { + "text": "There's not a good reason in the world for the horrible things I did,\" Bales said when he pleaded guilty, according to Drew Mikkelson of CNN affilliate KING, who was tweeting from the courtroom.", + "length": 194 + }, + { + "text": "His attorneys have said the service made a mistake in assigning Bales to another combat tour despite evidence of post-traumatic stress disorder and a traumatic brain injury suffered during a combat tour in Iraq.", + "length": 211 + }, + { + "text": "(CNN) -- The Army sergeant who admitted to gunning down 16 civilians in a 2012 rampage through two villages near his outpost in southern Afghanistan is expected to take the stand at his sentencing hearing and will apologize.", + "length": 224 + }, + { + "text": "Bales admitted to slipping away from his outpost in southern Afghanistan and going on a house-to-house killing spree in two nearby villages in March 2012, a massacre that further strained ties between American troops and their Afghan allies.", + "length": 241 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7675557732582092 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:09.066358719Z", + "first_section_created": "2025-12-23T09:35:09.067129251Z", + "last_section_published": "2025-12-23T09:35:09.067288957Z", + "all_results_received": "2025-12-23T09:35:09.130715065Z", + "output_generated": "2025-12-23T09:35:09.130879972Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:09.067129251Z", + "publish_time": "2025-12-23T09:35:09.067288957Z", + "first_worker_start": "2025-12-23T09:35:09.067854781Z", + "last_worker_end": "2025-12-23T09:35:09.129863Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:09.067809379Z", + "start_time": "2025-12-23T09:35:09.067894182Z", + "end_time": "2025-12-23T09:35:09.067966485Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:09.068113Z", + "start_time": "2025-12-23T09:35:09.068241Z", + "end_time": "2025-12-23T09:35:09.129863Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:09.067784678Z", + "start_time": "2025-12-23T09:35:09.067854781Z", + "end_time": "2025-12-23T09:35:09.067931984Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:09.067790078Z", + "start_time": "2025-12-23T09:35:09.067876681Z", + "end_time": "2025-12-23T09:35:09.067900382Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2682, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/005538eba9ddc63dc5d11ee3ef19643f5a2b6af3.json b/data/output/005538eba9ddc63dc5d11ee3ef19643f5a2b6af3.json new file mode 100644 index 0000000..c8bee4d --- /dev/null +++ b/data/output/005538eba9ddc63dc5d11ee3ef19643f5a2b6af3.json @@ -0,0 +1,298 @@ +{ + "file_name": "005538eba9ddc63dc5d11ee3ef19643f5a2b6af3.txt", + "total_words": 551, + "top_n_words": [ + { + "word": "the", + "count": 36 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "said", + "count": 10 + }, + { + "word": "earhart", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "s", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "\"But we're skeptical.", + "length": 21 + }, + { + "text": "\"We don't expect anything,\" she said.", + "length": 37 + }, + { + "text": "Earhart: The evidence we almost lost .", + "length": 38 + }, + { + "text": "\" the organization says on its website.", + "length": 39 + }, + { + "text": "And that includes locating a lost airplane.", + "length": 43 + }, + { + "text": "\" Opinion: Will mystery of Earhart be solved?", + "length": 45 + }, + { + "text": "There are people out there who buy those theories.", + "length": 50 + }, + { + "text": "Or is this sonar target just a coral rock or ridge?", + "length": 51 + }, + { + "text": "\" Money is needed to invest in expeditions, he said.", + "length": 52 + }, + { + "text": "Photo may be key to finding what happened to aviator .", + "length": 54 + }, + { + "text": "\"We don't want to shrug off the hard work anyone is doing.", + "length": 58 + }, + { + "text": "She said there have been other theories that have emerged.", + "length": 58 + }, + { + "text": "The group said experts have offered various interpretations.", + "length": 60 + }, + { + "text": "And that includes the latest foray into the South Pacific deep.", + "length": 63 + }, + { + "text": "We do like the idea that people are still interested,\" she said.", + "length": 64 + }, + { + "text": "Yet that evidence has been met with skepticism in some quarters.", + "length": 64 + }, + { + "text": "\" Could it really be a piece of Earhart's Lockheed Electra plane?", + "length": 65 + }, + { + "text": "It showed something -- hard for the layman to size up -- on the ocean floor.", + "length": 76 + }, + { + "text": "But in reality, Foudray said, \"no one has yet to come up with anything conclusive.", + "length": 82 + }, + { + "text": "The group said, \"It's the right size, it's the right shape and it's in the right place.", + "length": 87 + }, + { + "text": "One is that Earhart's plane was forced down by the Japanese around the Marshall Islands.", + "length": 88 + }, + { + "text": "Foudray said she's heard all of the evidence and nothing solid has risen to the surface.", + "length": 88 + }, + { + "text": "Some think the sonar image could be a man-made object, and others say it could be a geologic feature.", + "length": 101 + }, + { + "text": "Another is that Earhart secretly returned to the United States and the government gave her a new identity.", + "length": 106 + }, + { + "text": "\"Of course we're not going to know until we can get back out there, but until then the anomaly is worth close study.", + "length": 116 + }, + { + "text": "(CNN) -- Could search crews be just a few hundred feet from solving a mystery that has riveted millions for 76 years?", + "length": 117 + }, + { + "text": "The International Group for Historic Aircraft Recovery on its website is asking for contributions to continue its work.", + "length": 119 + }, + { + "text": "\"So did (last summer's) expedition actually succeed in locating the wreckage of the world's most famous missing airplane?", + "length": 121 + }, + { + "text": "The International Group for Historic Aircraft Recovery raised the prospect of a big break in the case by publishing an image online.", + "length": 132 + }, + { + "text": "Louise Foudray, caretaker and historian of the Amelia Earhart Birthplace Museum in Atchison, Kansas, chose her words carefully on Friday afternoon.", + "length": 147 + }, + { + "text": "\" It wasn't until March that one analyst made a possible connection to Earhart in an online forum for the International Group for Historic Aircraft Recovery.", + "length": 157 + }, + { + "text": "He cited technology such as side-scan sonar and magnetometers but said finding a lost plane such as the Earhart craft is \"more a function of funding than technology.", + "length": 165 + }, + { + "text": "\" Richard Fredricks, executive director of the American Salvage Association, a trade group, said that \"almost anything is possible\" these days with advanced technology.", + "length": 168 + }, + { + "text": "Debate about the mystery gained new currency this week after researchers publicized images recorded by search teams scanning the ocean floor nearly a year ago near Nikumaroro Island in the South Pacific.", + "length": 203 + }, + { + "text": "That's the question raised by tantalizing evidence published this week by teams trying to find out what happened to famed aviator Amelia Earhart, who vanished along with navigator Fred Noonan during a doomed attempt to fly around the world in 1937.", + "length": 248 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5164703130722046 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:09.568038545Z", + "first_section_created": "2025-12-23T09:35:09.568381259Z", + "last_section_published": "2025-12-23T09:35:09.568542466Z", + "all_results_received": "2025-12-23T09:35:09.628786443Z", + "output_generated": "2025-12-23T09:35:09.62895835Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:09.568381259Z", + "publish_time": "2025-12-23T09:35:09.568542466Z", + "first_worker_start": "2025-12-23T09:35:09.569026586Z", + "last_worker_end": "2025-12-23T09:35:09.626686Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:09.569078188Z", + "start_time": "2025-12-23T09:35:09.569143091Z", + "end_time": "2025-12-23T09:35:09.569218994Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:09.569288Z", + "start_time": "2025-12-23T09:35:09.569441Z", + "end_time": "2025-12-23T09:35:09.626686Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:09.568952283Z", + "start_time": "2025-12-23T09:35:09.569026586Z", + "end_time": "2025-12-23T09:35:09.569097289Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:09.568970984Z", + "start_time": "2025-12-23T09:35:09.569033386Z", + "end_time": "2025-12-23T09:35:09.56912889Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3211, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/0055959834abef2e0275d39571776b6c1800efd3.json b/data/output/0055959834abef2e0275d39571776b6c1800efd3.json new file mode 100644 index 0000000..f76c4f1 --- /dev/null +++ b/data/output/0055959834abef2e0275d39571776b6c1800efd3.json @@ -0,0 +1,540 @@ +{ + "file_name": "0055959834abef2e0275d39571776b6c1800efd3.txt", + "total_words": 1135, + "top_n_words": [ + { + "word": "the", + "count": 60 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "skakel", + "count": 25 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "s", + "count": 22 + }, + { + "word": "was", + "count": 18 + }, + { + "word": "for", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Clues?", + "length": 6 + }, + { + "text": "right.", + "length": 6 + }, + { + "text": "Long .", + "length": 6 + }, + { + "text": "Tommy .", + "length": 7 + }, + { + "text": "Books .", + "length": 7 + }, + { + "text": "Santos .", + "length": 8 + }, + { + "text": "She was .", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "more traction.", + "length": 14 + }, + { + "text": "their late mother.", + "length": 18 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "more than a decade ago.", + "length": 23 + }, + { + "text": "But she never got there.", + "length": 24 + }, + { + "text": "Fifty witnesses testified.", + "length": 26 + }, + { + "text": "of the family's prestige .", + "length": 26 + }, + { + "text": "14:56 EST, 6 November 2013 .", + "length": 28 + }, + { + "text": "16:28 EST, 6 November 2013 .", + "length": 28 + }, + { + "text": "Martha Moxley was out causing .", + "length": 31 + }, + { + "text": "He soon surrendered to authorities.", + "length": 35 + }, + { + "text": "Evidence photo from the trial show a .", + "length": 38 + }, + { + "text": "even look for, incriminating evidence.", + "length": 38 + }, + { + "text": "Skakel, the 53-year-old nephew of Robert F.", + "length": 43 + }, + { + "text": "Martha Moxley's home was just 150 yards away .", + "length": 46 + }, + { + "text": "Reportedly among them were employees and residents .", + "length": 52 + }, + { + "text": "confessed to the murder while undergoing rehab there.", + "length": 53 + }, + { + "text": "Bishop said the case was sad for the victim's family.", + "length": 53 + }, + { + "text": "Instead, her body was found beneath a tree in her back yard.", + "length": 60 + }, + { + "text": "Kennedy's widow, Ethel Kennedy, is serving 20 years to life.", + "length": 60 + }, + { + "text": "'This case was not a close call for this court,' Bishop said.", + "length": 61 + }, + { + "text": "mischief on Devil's Night, the night before Halloween, in 1976.", + "length": 63 + }, + { + "text": "In the years since the 1975 slaying, some have criticized the .", + "length": 63 + }, + { + "text": "In January 2000, an arrest warrant was issued for Michael Skakel.", + "length": 65 + }, + { + "text": "Gill said she disagrees that an appeal likely won't be successful.", + "length": 66 + }, + { + "text": "were published on the subject, including one by Mark Fuhrman of OJ .", + "length": 68 + }, + { + "text": "noting that Skakel's son was only 4 or 5 when he was sent to prison .", + "length": 69 + }, + { + "text": "papering homes in her elite gated community in Greenwich, Connecticut.", + "length": 70 + }, + { + "text": "Simpson trial infamy, but it wasn't until 1999 that the case got any .", + "length": 70 + }, + { + "text": "ringing doorbells, spraying shaving cream, throwing eggs, and toilet .", + "length": 70 + }, + { + "text": "Prosecutor Susann Gill said the state's case against Skakel was strong.", + "length": 71 + }, + { + "text": "of the Elan School in Poland Springs, Maine, where some claim Michael .", + "length": 71 + }, + { + "text": "police investigators of failing to search adequately for clues because .", + "length": 72 + }, + { + "text": "pushed for immediate bail, slamming the state's case against Skakel and .", + "length": 73 + }, + { + "text": "and Michael were initially suspects in the case after it was discovered .", + "length": 73 + }, + { + "text": "believed to have been bungled from the start, many believed police gave .", + "length": 73 + }, + { + "text": "Martha had been impaled in the neck with a shard from the shattered club.", + "length": 73 + }, + { + "text": "the rich family special treatment in the case and ignored, or failed to .", + "length": 73 + }, + { + "text": "hallway in the Skakel home, left, and a drop of blood in their driveway, .", + "length": 74 + }, + { + "text": "The Skakels cooperated with the investigation until 1976, then abruptly stopped.", + "length": 80 + }, + { + "text": "His current attorney, Hubert Santos, had filed a motion seeking a $500,000 bond.", + "length": 80 + }, + { + "text": "She left no later than 11pm, to walk the 150 yards or so to her neighboring house.", + "length": 82 + }, + { + "text": "The last house she happened upon was that of Michael and his brother Tommy Skakel.", + "length": 82 + }, + { + "text": "In removing the stay, Bishop said Sherman's errors were 'substantial and pervasive.", + "length": 83 + }, + { + "text": "Bishop lifted the stay Wednesday but decided he didn't have the authority to grant bail.", + "length": 88 + }, + { + "text": "Sherman, the trial attorney, has said he did all he could to prevent Skakel's conviction.", + "length": 89 + }, + { + "text": "Skakel and Moxley were 15-year-old neighbors in wealthy Greenwich at the time of her death.", + "length": 91 + }, + { + "text": "She had been bludgeoned with a 6 iron golf club so powerfully that the shaft had shattered.", + "length": 91 + }, + { + "text": "Years of continued speculation about Tommy, Michael, and a range of other suspects followed.", + "length": 92 + }, + { + "text": "After a four week trial, Skakel was found guilty of murder and sentenced to 20 years to life.", + "length": 93 + }, + { + "text": "Her pants had been removed, along with her underwear, but there were no signs of sexual assault.", + "length": 96 + }, + { + "text": "But Bishop said he addressed that concern by having Skakel agree to waive his right to a speedy trial.", + "length": 102 + }, + { + "text": "Prosecutors objected to the request for bond and are appealing the ruling granting Skakel a new trial.", + "length": 102 + }, + { + "text": "The Kennedy cousin accused of murder has been forced to remain in jail while waiting for his new trial.", + "length": 103 + }, + { + "text": "That's when a rare Connecticut one-man grand jury was called to hear evidence for a potential new trial in the case.", + "length": 116 + }, + { + "text": "He wore handcuffs and leg shackles to court Wednesday and smiled and waved to supporters as he came into the courtroom.", + "length": 119 + }, + { + "text": "Kennedy Jr spoke to the Today show in his cousin's defense and says he is sure he will be found innocent at the retrial .", + "length": 121 + }, + { + "text": "Scene: Another court evidence photo shows the rear of the Skakel home in their elite gated Greenwich, Connecticut community.", + "length": 124 + }, + { + "text": "Prosecutors argued Bishop doesn't have the authority to grant bond because of an automatic stay of his ruling while they appeal.", + "length": 128 + }, + { + "text": "'Whatever inherent authority the court may have it's not unbounded, and I believe it's not applicable to this case,' Bishop said.", + "length": 129 + }, + { + "text": "Skakel's attorneys want him to be freed while awaiting a retrial, saying that keeping him in prison would be a miscarriage of justice.", + "length": 134 + }, + { + "text": "Santos said automatic stays during appeals do not apply to cases like Skakel's and even if they did Bishop has the authority to terminate it.", + "length": 141 + }, + { + "text": "'He's entitled to get out as soon as possible,' Santos said, expressing concern that an appeal of the stay decision could take a month or two.", + "length": 142 + }, + { + "text": "' He said he didn't believe Skakel should be required to remain in prison for months or years while his ruling granting a new trial is appealed.", + "length": 144 + }, + { + "text": "In custody: Michael Skakel remains in jail for the time being but may be released on bail while waiting for his appeal trial for a 1975 murder .", + "length": 144 + }, + { + "text": "A judge has lifted a stay meaning that Michael Skakel may be granted bail at a later date, but that still leaves him behind bars for the time being.", + "length": 148 + }, + { + "text": "Weapon: This photo evidence used at trial shows the murder weapon, a 6 iron golf club that belonged to the late mother of Tommy and Michael Skakel .", + "length": 148 + }, + { + "text": "'The state is entitled to avail itself of the appellate process and seek vindication of a result it believes to be unjust,' Gill wrote in legal briefs.", + "length": 151 + }, + { + "text": "Gill argued against terminating the stay, saying it would thwart the administration of justice by requiring the state to retry Skakel before an appeal is finished.", + "length": 163 + }, + { + "text": "Judge Thomas Bishop ruled on October 23 that Skakel's trial attorney, Michael Sherman, failed to adequately represent him in 2002 when he was convicted in Martha Moxley's 1975 golf club bludgeoning.", + "length": 198 + }, + { + "text": "Brutal: Skakel was convicted of murdering his Greenwich, Connecticut neighbor Martha Moxely (pictured) in 1975 by bludgeoning her with his mother's golf club and shoving the shattered shaft into her neck .", + "length": 205 + }, + { + "text": "Skakel 'has been returned to the status of an innocent defendant awaiting trial,' Santos wrote in court papers, adding he was not a flight risk and contends it's 'highly unlikely' prosecutors will win their appeal.", + "length": 214 + }, + { + "text": "Case: Skakel's case for a new trial revolves around his defense attorney at the time Mickey Sherman, who Skakel has now successfully convinced a judge was 'too enamored with the media attention to focus on the defense' Thrilled at decision: Robert F.", + "length": 250 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6307295560836792 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:10.069285754Z", + "first_section_created": "2025-12-23T09:35:10.071000924Z", + "last_section_published": "2025-12-23T09:35:10.071289336Z", + "all_results_received": "2025-12-23T09:35:10.158353316Z", + "output_generated": "2025-12-23T09:35:10.158558024Z", + "total_processing_time_ms": 89, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 87, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:10.071000924Z", + "publish_time": "2025-12-23T09:35:10.071212233Z", + "first_worker_start": "2025-12-23T09:35:10.071839459Z", + "last_worker_end": "2025-12-23T09:35:10.146255Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:10.071858359Z", + "start_time": "2025-12-23T09:35:10.071905861Z", + "end_time": "2025-12-23T09:35:10.071990265Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:10.072048Z", + "start_time": "2025-12-23T09:35:10.072183Z", + "end_time": "2025-12-23T09:35:10.146255Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:10.071765456Z", + "start_time": "2025-12-23T09:35:10.071839459Z", + "end_time": "2025-12-23T09:35:10.072022266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:10.071769656Z", + "start_time": "2025-12-23T09:35:10.071842559Z", + "end_time": "2025-12-23T09:35:10.071890461Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:10.071243334Z", + "publish_time": "2025-12-23T09:35:10.071289336Z", + "first_worker_start": "2025-12-23T09:35:10.071849559Z", + "last_worker_end": "2025-12-23T09:35:10.157393Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:10.071858459Z", + "start_time": "2025-12-23T09:35:10.071906861Z", + "end_time": "2025-12-23T09:35:10.071947463Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:10.072143Z", + "start_time": "2025-12-23T09:35:10.072302Z", + "end_time": "2025-12-23T09:35:10.157393Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 85 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:10.071851659Z", + "start_time": "2025-12-23T09:35:10.071917762Z", + "end_time": "2025-12-23T09:35:10.071953663Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:10.071788457Z", + "start_time": "2025-12-23T09:35:10.071849559Z", + "end_time": "2025-12-23T09:35:10.07186826Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 159, + "min_processing_ms": 74, + "max_processing_ms": 85, + "avg_processing_ms": 79, + "median_processing_ms": 85, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3265, + "slowest_section_id": 1, + "slowest_section_time_ms": 86 + } +} diff --git a/data/output/0055d0207fa47067895280bb1066720c68872ed7.json b/data/output/0055d0207fa47067895280bb1066720c68872ed7.json new file mode 100644 index 0000000..545cc0b --- /dev/null +++ b/data/output/0055d0207fa47067895280bb1066720c68872ed7.json @@ -0,0 +1,214 @@ +{ + "file_name": "0055d0207fa47067895280bb1066720c68872ed7.txt", + "total_words": 371, + "top_n_words": [ + { + "word": "the", + "count": 34 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "cavendish", + "count": 5 + }, + { + "word": "i", + "count": 5 + }, + { + "word": "race", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "\"I can be proud of how the lads rode.", + "length": 37 + }, + { + "text": "They have got nothing left in the tank.", + "length": 39 + }, + { + "text": "\"There was a group of 22 who got away and we couldn't pull them back,\" said Cavendish.", + "length": 86 + }, + { + "text": "Colombia's Rigoberto Uran took the silver, while bronze went to Alexander Kristoff from Norway.", + "length": 95 + }, + { + "text": "Much fancied home favorite Mark Cavendish finished a disappointing 29th as the British team struggled to meet pre-race expectations.", + "length": 132 + }, + { + "text": "(CNN) -- Alexandre Vinokourov claimed Kazakhstan's first medal at the London Olympics with gold in Saturday's men's cycling road race.", + "length": 134 + }, + { + "text": "\"I will still race in the time trial on Wednesday but I have the gold medal I wanted and after that I will consider retiring,\" said the Kazakh cyclist.", + "length": 151 + }, + { + "text": "Cavendish's hopes appeared to have been boosted when rival Swiss sprinter Fabian Cancellara crashed, but the peloton could not close on the leading group.", + "length": 154 + }, + { + "text": "It was quickly clear that the Colombian had no answer to the Kazakh's attack and Vinokourov had a big enough lead to ride across the line with his arms aloft.", + "length": 158 + }, + { + "text": "As the two riders rounded the final corner, the Kazakh made the most of his rival slowing to look back to check for pursuers and launched a sprint to the line.", + "length": 159 + }, + { + "text": "The 38-year-old Vinokourov, who served a two-year ban for blood doping between 2007 and 2009, announced after the race that he may retire from the sport after Wednesday's cycling time trial.", + "length": 190 + }, + { + "text": "With just under 10km to the finish line -- and a handy time advantage of 50 seconds -- the leading pack looked to be preparing for a sprint finish, but Uran made a break for it, and surprisingly Vinokourov was the only rider to respond.", + "length": 236 + }, + { + "text": "World champion Cavendish had been strongly tipped to finally claim the Olympic medal that had eluded him at the 2008 Games in Beijing, where he was the only member of Britain's track cycling team to return from the games without a medal.", + "length": 237 + }, + { + "text": "Despite the British team having control of the peloton throughout the race, they struggled to close the gap on the lead group and Cavendish was nowhere in sight as Vinokourov out sprinted Uran to the finish line in front of Buckingham Palace.", + "length": 242 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.45591112971305847 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:10.572042124Z", + "first_section_created": "2025-12-23T09:35:10.573425481Z", + "last_section_published": "2025-12-23T09:35:10.573621689Z", + "all_results_received": "2025-12-23T09:35:10.638047038Z", + "output_generated": "2025-12-23T09:35:10.638194744Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:10.573425481Z", + "publish_time": "2025-12-23T09:35:10.573621689Z", + "first_worker_start": "2025-12-23T09:35:10.574042506Z", + "last_worker_end": "2025-12-23T09:35:10.637222Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:10.574083108Z", + "start_time": "2025-12-23T09:35:10.57413641Z", + "end_time": "2025-12-23T09:35:10.574178712Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:10.57438Z", + "start_time": "2025-12-23T09:35:10.574518Z", + "end_time": "2025-12-23T09:35:10.637222Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:10.574038806Z", + "start_time": "2025-12-23T09:35:10.574106209Z", + "end_time": "2025-12-23T09:35:10.574167711Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:10.573990504Z", + "start_time": "2025-12-23T09:35:10.574042506Z", + "end_time": "2025-12-23T09:35:10.574066107Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2063, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/005670c4a85a3122965180585a88682692b8d1b9.json b/data/output/005670c4a85a3122965180585a88682692b8d1b9.json new file mode 100644 index 0000000..5b95150 --- /dev/null +++ b/data/output/005670c4a85a3122965180585a88682692b8d1b9.json @@ -0,0 +1,262 @@ +{ + "file_name": "005670c4a85a3122965180585a88682692b8d1b9.txt", + "total_words": 642, + "top_n_words": [ + { + "word": "the", + "count": 48 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "lra", + "count": 15 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "human", + "count": 12 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "rights", + "count": 12 + }, + { + "word": "said", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "N.", + "length": 2 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "\"It's as if we don't exist.", + "length": 27 + }, + { + "text": "\" CNN could not independently confirm the massacre.", + "length": 51 + }, + { + "text": "\"I beg of you, please talk to others about what has happened to us.", + "length": 67 + }, + { + "text": "\" \"The government says the LRA are no longer a problem, but I know that's not true,\" he said.", + "length": 93 + }, + { + "text": "The LRA also killed those they abducted who walked too slowly or tried to escape, Human Rights Watch said.", + "length": 106 + }, + { + "text": "\"We have been forgotten,\" an 80-year-old Congolese man whose son was killed during the massacre told Human Rights Watch.", + "length": 120 + }, + { + "text": "The two commanders who perpetrated the December massacre report to one of those indicted leaders, according to Human Rights Watch.", + "length": 130 + }, + { + "text": "The Lord's Resistance Army (LRA) carried out the brutal campaign in northeastern Congo over four days in December, the report said.", + "length": 131 + }, + { + "text": "After being pushed out of Uganda in 2005, the LRA now operates in the remote border area between southern Sudan, Congo, and Central African Republic.", + "length": 149 + }, + { + "text": "According to those who managed to escape, children captured by the LRA were forced to kill other children who had disobeyed the LRA's rules, the report said.", + "length": 157 + }, + { + "text": "\"The four-day rampage demonstrates that the LRA remains a serious threat to civilians and is not a spent force, as the Ugandan and Congolese governments claim.", + "length": 159 + }, + { + "text": "Human Rights Watch said that the roughly 1,000 United Nations peacekeeping troops in LRA-affected parts of northeastern Congo are insufficient to protect civilians.", + "length": 164 + }, + { + "text": "In 2005, the International Criminal Court issued arrest warrants for senior LRA leaders for crimes they committed in northern Uganda, but those indicted remain at large.", + "length": 169 + }, + { + "text": "Security Council is planning to visit Congo in mid-April to discuss the peacekeeping force's plans for withdrawal and the protection of civilians, Human Rights Watch said.", + "length": 171 + }, + { + "text": "The peacekeeping force is considering removing some troops from the area under pressure from the Congolese government, a move Human Rights Watch warned against on Saturday.", + "length": 172 + }, + { + "text": "LRA forces attacked at least 10 villages from December 14 to 17, killing and abducting hundreds of civilians -- including women and children, according to Human Rights Watch.", + "length": 174 + }, + { + "text": "Most of those killed were adult men, but at least 13 women and 23 children were among the dead -- including a 3-year-old girl who was burned to death, according to Human Rights Watch.", + "length": 183 + }, + { + "text": "LRA combatants tied up villagers in the nation's remote Makombo area and hacked them to death with machetes or crushed their skulls with axes and heavy wooden sticks, the report said.", + "length": 183 + }, + { + "text": "In numerous cases, children were ordered to surround the victim in a circle and take turns beating the child on the head with a large wooden stick until the child died, the report said.", + "length": 185 + }, + { + "text": "The Congolese government denies that the LRA is still a serious threat in the country, which may have contributed to the absence of reports about the December massacre, Human Rights Watch said.", + "length": 193 + }, + { + "text": "\"The Makombo massacre is one of the worst ever committed by the LRA in its bloody 23-year history, yet it has gone unreported for months,\" said Anneke Van Woudenberg, a senior Africa researcher at Human Rights Watch.", + "length": 216 + }, + { + "text": "\" The LRA is led by self-declared mystic and prophet Joseph Kony, who claims his insurgency -- which began in 1986 -- is aimed at replacing Uganda's government, led by President Yoweri Museveni, with a democracy based on the Bible's Ten Commandments.", + "length": 250 + }, + { + "text": "(CNN) -- A rebel group in the Democratic Republic of Congo killed at least 321 civilians and abducted 250 others -- including at least 80 children -- in a previously unreported rampage late last year, Human Rights Watch said in a report released Saturday.", + "length": 255 + }, + { + "text": "The Makombo massacre is the deadliest documented attack by the LRA since killing sprees around Christmas 2008 left scores of Congolese dead, but dozens of other attacks against civilians have been carried out in other areas in recent months, Human Rights Watch said.", + "length": 266 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8301501870155334 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:11.074258172Z", + "first_section_created": "2025-12-23T09:35:11.074594386Z", + "last_section_published": "2025-12-23T09:35:11.074787494Z", + "all_results_received": "2025-12-23T09:35:11.134065131Z", + "output_generated": "2025-12-23T09:35:11.134207237Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:11.074594386Z", + "publish_time": "2025-12-23T09:35:11.074787494Z", + "first_worker_start": "2025-12-23T09:35:11.075314816Z", + "last_worker_end": "2025-12-23T09:35:11.133261Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:11.075248813Z", + "start_time": "2025-12-23T09:35:11.075314816Z", + "end_time": "2025-12-23T09:35:11.075390219Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:11.075559Z", + "start_time": "2025-12-23T09:35:11.075698Z", + "end_time": "2025-12-23T09:35:11.133261Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:11.075309715Z", + "start_time": "2025-12-23T09:35:11.075382418Z", + "end_time": "2025-12-23T09:35:11.075455721Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:11.075355017Z", + "start_time": "2025-12-23T09:35:11.07542652Z", + "end_time": "2025-12-23T09:35:11.075480723Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3800, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/00567158011683186f7a905ee4f3fcf83d661635.json b/data/output/00567158011683186f7a905ee4f3fcf83d661635.json new file mode 100644 index 0000000..46f9591 --- /dev/null +++ b/data/output/00567158011683186f7a905ee4f3fcf83d661635.json @@ -0,0 +1,444 @@ +{ + "file_name": "00567158011683186f7a905ee4f3fcf83d661635.txt", + "total_words": 946, + "top_n_words": [ + { + "word": "the", + "count": 50 + }, + { + "word": "a", + "count": 25 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "in", + "count": 22 + }, + { + "word": "billion", + "count": 21 + }, + { + "word": "samsung", + "count": 21 + }, + { + "word": "s", + "count": 18 + }, + { + "word": "galaxy", + "count": 16 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "and", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "9 billion).", + "length": 11 + }, + { + "text": "3 billion).", + "length": 11 + }, + { + "text": "5 billion).", + "length": 11 + }, + { + "text": "8 billion).", + "length": 11 + }, + { + "text": "4billion ($3.", + "length": 13 + }, + { + "text": "2 billion ($1.", + "length": 14 + }, + { + "text": "9 billion ($6.", + "length": 14 + }, + { + "text": "8 billion ($9.", + "length": 14 + }, + { + "text": "4 billion ($3.", + "length": 14 + }, + { + "text": "7-inch display.", + "length": 15 + }, + { + "text": "5-inch iPhone 6 Plus.", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "semiconductor business.", + "length": 23 + }, + { + "text": "3 billion ($47 billion).", + "length": 24 + }, + { + "text": "8 billion) for July to September this year.", + "length": 43 + }, + { + "text": "Compared to a record-high of £6 billion ($9.", + "length": 45 + }, + { + "text": "'The iPhone 6 will be a significant threat to Samsung.", + "length": 54 + }, + { + "text": "9 billion), according to FactSet, a financial data provider.", + "length": 60 + }, + { + "text": "Quarterly profit from its mobile business, which reached £3.", + "length": 61 + }, + { + "text": "3 billion) a year earlier, is forecast to be a little over £1.", + "length": 64 + }, + { + "text": "The latest reports claim the device, dubbed the Nexus 6, has a 5.", + "length": 65 + }, + { + "text": "That expectation has been steadily lowered to about half this month.", + "length": 68 + }, + { + "text": "5 billion) in 2013, the company is set to report earnings of just £2.", + "length": 70 + }, + { + "text": "This would put the device in direct competition with Apple's phablet 5.", + "length": 71 + }, + { + "text": "That was below the median of analysts' expectations of £3 billion ($4.", + "length": 71 + }, + { + "text": "4 billion ($44 billion), slightly below analysts' expectations of £29.", + "length": 71 + }, + { + "text": "While in China, local brands are making inroads into Samsung's business.", + "length": 72 + }, + { + "text": "It would also put it on par with Samsung's flagship Galaxy Note 4, which has a 5.", + "length": 81 + }, + { + "text": "9-inch screen and will run Android L - the firm's next-generation operating system.", + "length": 83 + }, + { + "text": "In January, analysts estimated Samsung's third quarter operating income would exceed £5.", + "length": 89 + }, + { + "text": "If it happens, this would reflect a 60 per cent plunge from the record-high £6 billion ($9.", + "length": 92 + }, + { + "text": "In today's report, Samsung said the median forecast of July to September operating income was £2.", + "length": 98 + }, + { + "text": "In emerging markets, such as India and China, Samsung's smartphone sales were overtaken by local rivals.", + "length": 104 + }, + { + "text": "Samsung estimated sales for the July to September period declined 20 per cent from a year earlier to £27.", + "length": 106 + }, + { + "text": "The decline in Galaxy sales has hurt demand for Samsung components such as an advanced display called OLED.", + "length": 107 + }, + { + "text": "The construction will begin before the summer next year and begin operations during the second half of 2017.", + "length": 108 + }, + { + "text": "In emerging markets, such as India and China, Samsung's smartphone sales have been overtaken by local rivals .", + "length": 110 + }, + { + "text": "Analysts have repeatedly cut forecasts of Samsung's profit this year as Galaxy sales lagged behind expectations.", + "length": 112 + }, + { + "text": "Samsung (headquarters in Seoul pictured) is set to report its lowest quarterly earnings in more than three years.", + "length": 113 + }, + { + "text": "The South Korean electronics giant became the biggest smartphone brand on the popularity of earlier Galaxy models.", + "length": 114 + }, + { + "text": "But the bigger screen on Apple's new iPhone 6 has been linked with luring customers away from the larger Galaxy range.", + "length": 118 + }, + { + "text": "They claim earnings in the quarter ending in September could suffer their biggest decline in Samsung's recent history.", + "length": 118 + }, + { + "text": "But the supply volume for the Edge smartphone will be limited, not giving a big boost to its earnings, analysts said .", + "length": 118 + }, + { + "text": "'The operating margin declined due to increased marketing expenditure and lowered average selling price,' Samsung said.", + "length": 119 + }, + { + "text": "'Rather than seeking stability, Samsung should seek to distinguish [its phones] with Galaxy's design policies,' he said.", + "length": 120 + }, + { + "text": "The company said it 'cautiously expects increased shipments of new smartphones and strong seasonal demand for TV products'.", + "length": 123 + }, + { + "text": "But the supply volume for the Edge smartphone will be limited, likely not giving a big boost to its earnings, analysts added.", + "length": 125 + }, + { + "text": "Other rumoured specifications include a 2MP front-facing camera, and camera on the rear that could range between 8MP and 13MP.", + "length": 126 + }, + { + "text": "It's been almost a year since Google released its Nexus 5, and rumours about the next model have been ramping up in recent weeks.", + "length": 129 + }, + { + "text": "Analysts said the bigger iPhones released last month will likely take away American customers who favored the Galaxy's bigger screens.", + "length": 134 + }, + { + "text": "Analysts claim the bigger iPhones released last month (shown) will likely take away customers who favoured the Galaxy's larger screens.", + "length": 135 + }, + { + "text": "It also began sales of the Galaxy Note 4 in China last month, getting an early start in the world's most populous country before Apple.", + "length": 135 + }, + { + "text": "The latest news comes despite the launch of the Samsung Galaxy S5 (pictured) earlier this year, which was received by mostly rave reviews.", + "length": 138 + }, + { + "text": "It comes as the bigger screen on Apple's new iPhone 6 and iPhone 6 Plus has been linked with luring customers away from the previously bigger Galaxy phone .", + "length": 156 + }, + { + "text": "Samsung, the world's biggest smartphone maker, is suffering a financial decline in the face of intense competition from Apple, as well as new Chinese brands.", + "length": 157 + }, + { + "text": "This week, Samsung announced a £9 billion ($15 billion) investment plan to build a new semi-conductor fabrication plant in the South Korean city of Pyeongtaek.", + "length": 160 + }, + { + "text": "' The company moved the launch of the Galaxy Note 4, a large smartphone with a stylus, to late September from October after Apple unveiled the iPhone 6 and 6 Plus.", + "length": 163 + }, + { + "text": "Last month, Samsung also received upbeat initial responses to its Galaxy Note Edge smartphone, a smartphone with a curved side screen that can display weather, news, apps and other information.", + "length": 193 + }, + { + "text": "Last month, Samsung also received upbeat initial responses to its Galaxy Note Edge smartphone (shown), a smartphone with a curved side screen that can display weather, news, apps and other information.", + "length": 201 + }, + { + "text": "In a preview to earnings due out later this month, Samsung said today its quarterly profit was estimated to have fallen to its lowest level in more than three years - dragged down by weak sales of its new Galaxy model.", + "length": 218 + }, + { + "text": "The Galaxy S5 handpicked features from its competitors and put them all into a single handset, with some calling it a 'superphone' The company needs to revamp its handset designs, said Lee Seung-woo, an analyst at IBK Securities.", + "length": 229 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5777366161346436 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:11.575574883Z", + "first_section_created": "2025-12-23T09:35:11.57597Z", + "last_section_published": "2025-12-23T09:35:11.576364116Z", + "all_results_received": "2025-12-23T09:35:11.655636675Z", + "output_generated": "2025-12-23T09:35:11.655805382Z", + "total_processing_time_ms": 80, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 79, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:11.57597Z", + "publish_time": "2025-12-23T09:35:11.576238811Z", + "first_worker_start": "2025-12-23T09:35:11.576856936Z", + "last_worker_end": "2025-12-23T09:35:11.654766Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:11.576896838Z", + "start_time": "2025-12-23T09:35:11.576966341Z", + "end_time": "2025-12-23T09:35:11.577086446Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:11.577118Z", + "start_time": "2025-12-23T09:35:11.577244Z", + "end_time": "2025-12-23T09:35:11.654766Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:11.576885137Z", + "start_time": "2025-12-23T09:35:11.57695694Z", + "end_time": "2025-12-23T09:35:11.577045244Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:11.576788633Z", + "start_time": "2025-12-23T09:35:11.576856936Z", + "end_time": "2025-12-23T09:35:11.576897238Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:11.576287713Z", + "publish_time": "2025-12-23T09:35:11.576364116Z", + "first_worker_start": "2025-12-23T09:35:11.576842036Z", + "last_worker_end": "2025-12-23T09:35:11.641889Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:11.576934739Z", + "start_time": "2025-12-23T09:35:11.576966341Z", + "end_time": "2025-12-23T09:35:11.576983641Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:11.577128Z", + "start_time": "2025-12-23T09:35:11.577278Z", + "end_time": "2025-12-23T09:35:11.641889Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:11.576921439Z", + "start_time": "2025-12-23T09:35:11.57695914Z", + "end_time": "2025-12-23T09:35:11.576983141Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:11.576789733Z", + "start_time": "2025-12-23T09:35:11.576842036Z", + "end_time": "2025-12-23T09:35:11.576850936Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 141, + "min_processing_ms": 64, + "max_processing_ms": 77, + "avg_processing_ms": 70, + "median_processing_ms": 77, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2800, + "slowest_section_id": 0, + "slowest_section_time_ms": 78 + } +} diff --git a/data/output/00568b1c8e087756f65db78a0e3b4e9945ed86df.json b/data/output/00568b1c8e087756f65db78a0e3b4e9945ed86df.json new file mode 100644 index 0000000..8e016a2 --- /dev/null +++ b/data/output/00568b1c8e087756f65db78a0e3b4e9945ed86df.json @@ -0,0 +1,516 @@ +{ + "file_name": "00568b1c8e087756f65db78a0e3b4e9945ed86df.txt", + "total_words": 1270, + "top_n_words": [ + { + "word": "the", + "count": 89 + }, + { + "word": "and", + "count": 47 + }, + { + "word": "nokia", + "count": 42 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "x", + "count": 26 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "s", + "count": 17 + }, + { + "word": "with", + "count": 17 + }, + { + "word": "is", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "1 .", + "length": 3 + }, + { + "text": "com.", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "6bn).", + "length": 5 + }, + { + "text": "4 KitKat.", + "length": 9 + }, + { + "text": "Despite .", + "length": 9 + }, + { + "text": "2bn; £4.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Jellybean.", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "4bn euros ($7.", + "length": 14 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "It has a 3-inch screen, 3.", + "length": 26 + }, + { + "text": "04:19 EST, 24 February 2014 .", + "length": 29 + }, + { + "text": "16:23 EST, 24 February 2014 .", + "length": 29 + }, + { + "text": "1-inch tablet running Windows RT.", + "length": 33 + }, + { + "text": "Victoria Woollaston In Barcelona .", + "length": 34 + }, + { + "text": "The X is available now for 89 euros.", + "length": 36 + }, + { + "text": "'The XL sports a large five-inch screen.", + "length": 40 + }, + { + "text": "The most up-to-date version is Android 4.", + "length": 41 + }, + { + "text": "2 GHz dual-core processor with 512MB RAM.", + "length": 41 + }, + { + "text": "It hasn't abandoned Windows Phone completely, though.", + "length": 53 + }, + { + "text": "run an older version of the Google-owner software – Android 4.", + "length": 64 + }, + { + "text": "The UK and US prices for the phones have not yet been announced.", + "length": 64 + }, + { + "text": "2MP camera and comes in red, green, cyan, yellow, white and black.", + "length": 66 + }, + { + "text": "Nokia has announced that it's launching its first Android handsets.", + "length": 67 + }, + { + "text": "The Asha range starts at £42 ($69) with 2MP and 5MP camera models.", + "length": 67 + }, + { + "text": "range proves a great entry to the smartphone ecosystem for new users.", + "length": 69 + }, + { + "text": "Nokia has also licensed its patents and mapping services to Microsoft.", + "length": 70 + }, + { + "text": "Similarly, the Nokia X range doesn't use Google apps or cloud services.", + "length": 71 + }, + { + "text": "Nokia's CEO also announced the Lumia phones will be reduced as a result.", + "length": 72 + }, + { + "text": "being the first Android handset from the Finnish firm, it has chosen to .", + "length": 73 + }, + { + "text": "Microsoft signed a deal to buy Nokia’s phone business in September for 5.", + "length": 75 + }, + { + "text": "The X+ and XL go on sale from April for 99 euros and 109 euros respectively.", + "length": 76 + }, + { + "text": "Elsewhere, the screen has a low 800 x 480 resolution and is powered by the 1.", + "length": 77 + }, + { + "text": "The X series is a significant release for Nokia, despite the basic specifications.", + "length": 82 + }, + { + "text": "Nokia made the announcement at the annual Mobile World Congress event in Barcelona.", + "length": 83 + }, + { + "text": "Nokia’s wider offering may have something to do its recent takeover from Microsoft.", + "length": 85 + }, + { + "text": "The Nokia XL will be available in bright green, orange, cyan, yellow, black and white.", + "length": 86 + }, + { + "text": "The original Asha 501 handset was announced in May last year and began shipping in June.", + "length": 88 + }, + { + "text": "Leaked images of the new Nokia handses emerged earlier this month, under the name Nokia X.", + "length": 90 + }, + { + "text": "The Nokia X is an entry-level device with a 4-inch screen, 5MP camera and just 4GB of memory.", + "length": 93 + }, + { + "text": "The X and X+are both entry-level devices with 4-inch screens, while the XL is larger at 5-inches.", + "length": 97 + }, + { + "text": "Alongside the Asha handsets, Nokia also launched its first tablet last year – the Lumia 2520 10.", + "length": 98 + }, + { + "text": "The launch of an Android Nokia phone is the company’s latest foray into new areas of the market.", + "length": 98 + }, + { + "text": "The X looks similar to Nokia’s current basic Asha smartphones and is due to go on sale next month.", + "length": 100 + }, + { + "text": "Pictured is Nokia chief executive Stephen Elop speaking at the Mobile World Conference in Barcelona .", + "length": 101 + }, + { + "text": "The X and X Plus are both entry-level devices with 4-inch screens, while the XL is larger at 5-inches .", + "length": 103 + }, + { + "text": "The Nokia X range also complements the firm’s existing range of budget handsets, namely the Asha range.", + "length": 105 + }, + { + "text": "However, Kerr said that could prove a barrier for those without the know-how, or lacking the inclination.", + "length": 105 + }, + { + "text": "Both the Nokia X and Nokia X+ will be available in bright green, bright red, cyan, yellow, black and white.", + "length": 107 + }, + { + "text": "Nokia chief executive Stephen Elop holds up the Nokia X handsets at the Mobile World Conference in Barcelona .", + "length": 110 + }, + { + "text": "The X range has been designed to look similar to Nokia’s current basic Asha smartphones, which run S40 software.", + "length": 114 + }, + { + "text": "It is also synonymous with budget and low-end handsets because its intuitive design appeals to less technical users.", + "length": 116 + }, + { + "text": "Nokia also announced a basic smartphone called the 220, with a 2MP camera, for 29 euros and the Asha 230 for 45 euros.", + "length": 118 + }, + { + "text": "This is likely to be the last Nokia release before Microsoft officially takes over, following an acquisition in September.", + "length": 122 + }, + { + "text": "Nokia will be hoping to attract this end of the market, as well as appeal to emerging markets with the release of X and X+.", + "length": 123 + }, + { + "text": "'Nokia's new smartphone family might run on Android, but it doesn’t give users access to the Google Play store,' he said.", + "length": 123 + }, + { + "text": "The X range (pictured) has been designed to look similar to Nokia's current basic Asha smartphones, which run Windows Phone .", + "length": 125 + }, + { + "text": "Although the operating system is Android based, Nokia has added the tiles and personalisation features seen on Windows Phone .", + "length": 126 + }, + { + "text": "Camera wise, the Nokia X comes with a rear-facing 3MP camera and the XL includes a 2MP front-facing device and 5MP on the back.", + "length": 127 + }, + { + "text": "Nokia has officially launched its first Android handsets – the much-rumoured Nokia X, the X+ and XL - at an event in Barcelona.", + "length": 129 + }, + { + "text": "Windows Phone has been criticised for being difficult to use and runs a fraction of the popular apps seen on the other platforms.", + "length": 129 + }, + { + "text": "'However, it will have all of the big-name apps like Facebook, Skype and Twitter, as well as Nokia's own Here maps and Mix Radio.", + "length": 129 + }, + { + "text": "It seems Nokia has chosen Android purely for the apps than the interface, and will use Microsoft's cloud services including OneDrive instead.", + "length": 141 + }, + { + "text": "Asha 500 has a 2MP camera and starts at $69 (£42), the 502 and 503 handsets both have 5MP cameras and costs £55 ($89) and £61 ($99) respectively.", + "length": 148 + }, + { + "text": "All three devices are powered by the Qualcomm Snapdragon dual core processor and support Dual SIM, letting people switch SIM cards to get better deals.", + "length": 151 + }, + { + "text": "Android, on the other hand, holds the majority share of the smartphone market in every region globally and has an app store with more than 700,000 apps.", + "length": 152 + }, + { + "text": "Asha 500 (pictured) has a 2MP camera and starts at $69 (£42), the 502 and 503 handsets both have 5MP cameras and costs $89 (£55) and $99 (£61) respectively.", + "length": 159 + }, + { + "text": "The purchase is set to become official soon and 32,000 Nokia employees will transfer to the Washington-based firm, which recently hired a new CEO Satya Nadella.", + "length": 160 + }, + { + "text": "Teaser invitations to this morning’s press conference hinted towards this name, too, before a series of ‘early built’ images were posted on Chinese site Coolxap.", + "length": 167 + }, + { + "text": "Previous Nokia handsets ran the Windows Phone operating systems, and although the software has slowly been gaining ground on both Android and iOS, it still lags behind.", + "length": 168 + }, + { + "text": "' He added that the lack of easy app access might hold Nokia back but, with a little bit of effort, Android software can be installed on the higher end models via a microSD card.", + "length": 178 + }, + { + "text": "The latest Asha devices, announced in October added improved sharing options, for example, photos can be shared with one click to Facebook, Twitter, over Bluetooth and now Whatsapp.", + "length": 181 + }, + { + "text": "It also launched its first 6-inch, phablet device, the Nokia 1320, to compete with some of the other larger models on the market including the HTC One Max and the Galaxy Note range.", + "length": 181 + }, + { + "text": "Rumours about a Nokia handset running Android began towards the end of last year, and these leaked shots appeared to confirm it; with grids and icons similar to those found on existing Android phones.", + "length": 200 + }, + { + "text": "The Nokia X, X+ and XL are a vast departure to what we've seen before from the manufacturer - but the customisable, tiled OS is similar to what we got from Nokia's Lumia range, according to Rob Kerr, mobiles expert at uSwitch.", + "length": 226 + }, + { + "text": "It's nothing new – screens have been getting bigger for a while – but it shows Nokia is keeping up with the Joneses, and phones like the five-inch Samsung Galaxy S4, matching current competition in terms of popularity,' he said.", + "length": 232 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5472387075424194 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:12.077187107Z", + "first_section_created": "2025-12-23T09:35:12.077546022Z", + "last_section_published": "2025-12-23T09:35:12.077891536Z", + "all_results_received": "2025-12-23T09:35:12.177149117Z", + "output_generated": "2025-12-23T09:35:12.177358625Z", + "total_processing_time_ms": 100, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 99, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:12.077546022Z", + "publish_time": "2025-12-23T09:35:12.077762531Z", + "first_worker_start": "2025-12-23T09:35:12.078327054Z", + "last_worker_end": "2025-12-23T09:35:12.159092Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:12.078364055Z", + "start_time": "2025-12-23T09:35:12.078430958Z", + "end_time": "2025-12-23T09:35:12.078545063Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:12.078603Z", + "start_time": "2025-12-23T09:35:12.078749Z", + "end_time": "2025-12-23T09:35:12.159092Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 80 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:12.078298953Z", + "start_time": "2025-12-23T09:35:12.078362755Z", + "end_time": "2025-12-23T09:35:12.078522062Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:12.078253351Z", + "start_time": "2025-12-23T09:35:12.078327054Z", + "end_time": "2025-12-23T09:35:12.078375356Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:12.077835434Z", + "publish_time": "2025-12-23T09:35:12.077891536Z", + "first_worker_start": "2025-12-23T09:35:12.078347155Z", + "last_worker_end": "2025-12-23T09:35:12.176222Z", + "total_journey_time_ms": 98, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:12.078401557Z", + "start_time": "2025-12-23T09:35:12.078438458Z", + "end_time": "2025-12-23T09:35:12.07847826Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:12.078603Z", + "start_time": "2025-12-23T09:35:12.078758Z", + "end_time": "2025-12-23T09:35:12.176222Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 97 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:12.078378856Z", + "start_time": "2025-12-23T09:35:12.078419158Z", + "end_time": "2025-12-23T09:35:12.07848716Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:12.078306353Z", + "start_time": "2025-12-23T09:35:12.078347155Z", + "end_time": "2025-12-23T09:35:12.078367455Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 177, + "min_processing_ms": 80, + "max_processing_ms": 97, + "avg_processing_ms": 88, + "median_processing_ms": 97, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3577, + "slowest_section_id": 1, + "slowest_section_time_ms": 98 + } +} diff --git a/data/output/0056b1bcbac46553d1ac44c3c2132af7c819ed54.json b/data/output/0056b1bcbac46553d1ac44c3c2132af7c819ed54.json new file mode 100644 index 0000000..91ab9ef --- /dev/null +++ b/data/output/0056b1bcbac46553d1ac44c3c2132af7c819ed54.json @@ -0,0 +1,488 @@ +{ + "file_name": "0056b1bcbac46553d1ac44c3c2132af7c819ed54.txt", + "total_words": 916, + "top_n_words": [ + { + "word": "the", + "count": 49 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "celtic", + "count": 20 + }, + { + "word": "he", + "count": 20 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "is", + "count": 18 + }, + { + "word": "larsson", + "count": 18 + }, + { + "word": "be", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "League football.", + "length": 16 + }, + { + "text": "Or Kevin Bridges.", + "length": 17 + }, + { + "text": "Stephen Mcgowan .", + "length": 17 + }, + { + "text": "He would have to be.", + "length": 20 + }, + { + "text": "But the problem is this.", + "length": 24 + }, + { + "text": "They will be pulping them.", + "length": 26 + }, + { + "text": "Is Henrik Larsson up to it?", + "length": 27 + }, + { + "text": "It’s Larsson the manager.", + "length": 27 + }, + { + "text": "The incessant media demands.", + "length": 28 + }, + { + "text": "He wants the job desperately.", + "length": 29 + }, + { + "text": "But that’s not quite right.", + "length": 29 + }, + { + "text": "And he no longer scores goals.", + "length": 30 + }, + { + "text": "Celtic need an adrenalin shot.", + "length": 30 + }, + { + "text": "Someone who will stir the senses.", + "length": 33 + }, + { + "text": "But only after a nine-game trial.", + "length": 33 + }, + { + "text": "It’s how they define themselves.", + "length": 34 + }, + { + "text": "BOIS Landskrona was his first gig.", + "length": 34 + }, + { + "text": "Bluntly, he would put bums on seats.", + "length": 36 + }, + { + "text": "That’s what sustains this club now.", + "length": 37 + }, + { + "text": "Larsson is box office, a marquee name.", + "length": 38 + }, + { + "text": "Far more so than his public image suggests.", + "length": 43 + }, + { + "text": "That’s where the real money is to be found.", + "length": 45 + }, + { + "text": "The inevitable intrusion into his private life.", + "length": 47 + }, + { + "text": "In private Larsson can be a warm and funny man.", + "length": 47 + }, + { + "text": "Someone to rally the fans and stimulate interest.", + "length": 49 + }, + { + "text": "This time around Celtic don’t have that luxury.", + "length": 49 + }, + { + "text": "Jackie McNamara and Malky Mackay probably wouldn’t.", + "length": 53 + }, + { + "text": "Little to indicate he might be the new Jose Mourinho.", + "length": 53 + }, + { + "text": "Larsson, then, has no real coaching record to speak of.", + "length": 55 + }, + { + "text": "But there is an unmistakable irony here and it’s this.", + "length": 56 + }, + { + "text": "Like Lennon he has a strong, determined mind of his own.", + "length": 56 + }, + { + "text": "He would never have become the Magnificent Seven of yore.", + "length": 57 + }, + { + "text": "Neil Lennon showed that an inexperienced manager can do it.", + "length": 59 + }, + { + "text": "Time to move on: Neil Lennon left Celtic earlier in the week .", + "length": 62 + }, + { + "text": "He’s not the type to be controlled or told who he is signing.", + "length": 63 + }, + { + "text": "But it’s not Larsson, the iconic player, they would be getting.", + "length": 65 + }, + { + "text": "But the appeal to Dermot Desmond and the Celtic board is obvious.", + "length": 65 + }, + { + "text": "But there are aspects of managing Celtic which would grate hugely.", + "length": 66 + }, + { + "text": "Right now there isn’t a scrap of evidence to suggest that he is.", + "length": 66 + }, + { + "text": "He spent seven years at the club scoring 242 goals in 315 appearances.", + "length": 70 + }, + { + "text": "And whoever they appoint as the next manager has to be able to deliver.", + "length": 71 + }, + { + "text": "Like the Northern Irishman, Larsson knows plenty about life at Parkhead.", + "length": 72 + }, + { + "text": "Henrik Larsson has shown little to suggest he is the man to manage Celtic.", + "length": 74 + }, + { + "text": "People have also described it as an instance of the heart ruling the head.", + "length": 74 + }, + { + "text": "The Celtic board are entitled to go for someone who will invigorate the place.", + "length": 78 + }, + { + "text": "If Celtic’s only priority is selling tickets they could appoint Billy Connolly.", + "length": 81 + }, + { + "text": "Goal machine: Larsson scored 242 times for Celtic during his career at the club .", + "length": 81 + }, + { + "text": "What they actually need is the best man to take them back to the Champions League.", + "length": 82 + }, + { + "text": "That Larsson would be a commercial rather than football decision is beyond dispute.", + "length": 83 + }, + { + "text": "But if Coyle is appointed the Scottish champions won’t be selling season tickets.", + "length": 83 + }, + { + "text": "Baptism of fire: Lennon was inexperienced as a manager when he took over at Celtic .", + "length": 84 + }, + { + "text": "It’s possible, of course, that Larsson could be an outstanding success on both fronts.", + "length": 88 + }, + { + "text": "How many of the names on a lengthy list of runners and riders would immediately do that?", + "length": 88 + }, + { + "text": "Currently 11th in a league of 16 teams this is a team battling for survival, not trophies.", + "length": 90 + }, + { + "text": "Nothing which screams he is the man to lead Celtic to the last 16 of the Champions League.", + "length": 90 + }, + { + "text": "Legend: Larsson enjoyed a seven-year career at Celtic and won the league title four times .", + "length": 91 + }, + { + "text": "In with a shout: Henrik Larsson could be the next Celtic boss but lacks coaching experience .", + "length": 93 + }, + { + "text": "He might buy into Celtic’s ‘model’ of buying young players and selling big 100 per cent.", + "length": 94 + }, + { + "text": "Three years at modest Swedish second flight club Landskrona ended tamely with a mutual parting of the ways.", + "length": 107 + }, + { + "text": "Temptation: Celtic majority shareholder Dermot Desmond knows the appeal of hiring Larsson to replace Lennon .", + "length": 109 + }, + { + "text": "Even signing autographs – a practice he was prone to avoiding as a player by having his car moved to a sidedoor.", + "length": 114 + }, + { + "text": "He has been fully briefed on why Lennon left and, despite telling his current club he will stay put, wants the job.", + "length": 115 + }, + { + "text": "At Falkenberg, his latest gig, the first ten games of the season have brought three wins, two draws and five defeats.", + "length": 117 + }, + { + "text": "Had Henrik Larsson been a Celtic player now he would have hung around one, maybe two seasons, then he would have been sold.", + "length": 123 + }, + { + "text": "Because what can be more calculating or planned than choosing a manager more suited to selling season tickets than winning trophies?", + "length": 132 + }, + { + "text": "Money was scarce and, for a man who played for three of Europe’s biggest clubs, Celtic offer a far more appealing canvas to paint on.", + "length": 135 + }, + { + "text": "From the perspective of a board of directors wrestling with growing apathy and boredom in the stands, then, Larsson holds obvious appeal.", + "length": 137 + }, + { + "text": "With the current squad of players, Larsson is just as likely to win the Scottish Premiership by 20 points as a Malky Mackay or a Paul Lambert.", + "length": 142 + }, + { + "text": "Owen Coyle is a sound, solid candidate - a lifelong Celtic fan with just one blip on his CV from Bolton Wanderers and a less than sparkling spell at Wigan.", + "length": 155 + }, + { + "text": "Travelling to meet the great man in the Swedish outpost in April 2010 the pitch was brown and bare, the stands the type you’d find at a municipal athletics stadium.", + "length": 166 + }, + { + "text": "It could be argued that Celtic’s Champions League qualification hopes would be better with Larsson in the dug-out and 60,000 in the top deck than they would be with Owen Coyle in there and a crowd of 42,000.", + "length": 209 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4247047156095505 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:12.578637224Z", + "first_section_created": "2025-12-23T09:35:12.578961537Z", + "last_section_published": "2025-12-23T09:35:12.579216847Z", + "all_results_received": "2025-12-23T09:35:12.658500607Z", + "output_generated": "2025-12-23T09:35:12.658715316Z", + "total_processing_time_ms": 80, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 79, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:12.578961537Z", + "publish_time": "2025-12-23T09:35:12.579165845Z", + "first_worker_start": "2025-12-23T09:35:12.579722768Z", + "last_worker_end": "2025-12-23T09:35:12.65748Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:12.579708568Z", + "start_time": "2025-12-23T09:35:12.57976087Z", + "end_time": "2025-12-23T09:35:12.579861174Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:12.579927Z", + "start_time": "2025-12-23T09:35:12.580088Z", + "end_time": "2025-12-23T09:35:12.65748Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:12.579615864Z", + "start_time": "2025-12-23T09:35:12.579722768Z", + "end_time": "2025-12-23T09:35:12.579819672Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:12.579694567Z", + "start_time": "2025-12-23T09:35:12.579744469Z", + "end_time": "2025-12-23T09:35:12.579960378Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:12.579185546Z", + "publish_time": "2025-12-23T09:35:12.579216847Z", + "first_worker_start": "2025-12-23T09:35:12.579700367Z", + "last_worker_end": "2025-12-23T09:35:12.614276Z", + "total_journey_time_ms": 35, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:12.579724468Z", + "start_time": "2025-12-23T09:35:12.57975907Z", + "end_time": "2025-12-23T09:35:12.57976637Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:12.580005Z", + "start_time": "2025-12-23T09:35:12.580136Z", + "end_time": "2025-12-23T09:35:12.614276Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 34 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:12.579820472Z", + "start_time": "2025-12-23T09:35:12.579876175Z", + "end_time": "2025-12-23T09:35:12.579886675Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:12.579658466Z", + "start_time": "2025-12-23T09:35:12.579700367Z", + "end_time": "2025-12-23T09:35:12.579703767Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 111, + "min_processing_ms": 34, + "max_processing_ms": 77, + "avg_processing_ms": 55, + "median_processing_ms": 77, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2555, + "slowest_section_id": 0, + "slowest_section_time_ms": 78 + } +} diff --git a/data/output/0056b7222ef0324ef31cdf2dc605a60744d56d78.json b/data/output/0056b7222ef0324ef31cdf2dc605a60744d56d78.json new file mode 100644 index 0000000..b309210 --- /dev/null +++ b/data/output/0056b7222ef0324ef31cdf2dc605a60744d56d78.json @@ -0,0 +1,234 @@ +{ + "file_name": "0056b7222ef0324ef31cdf2dc605a60744d56d78.txt", + "total_words": 506, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "6", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "she", + "count": 12 + }, + { + "word": "from", + "count": 10 + }, + { + "word": "was", + "count": 9 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "her", + "count": 8 + }, + { + "word": "i", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "\"Yesterday night I was awake really late,\" she said.", + "length": 52 + }, + { + "text": "Since we are number one and two we must be doing something right.", + "length": 65 + }, + { + "text": "\"You know, it was very emotional, because I would have loved to be there, but I'm here.", + "length": 87 + }, + { + "text": "\"Kim had a tough loss yesterday and I had a tough loss today and that's what can happen.", + "length": 88 + }, + { + "text": "\"She played very well today, better than me for sure,\" Wozniacki told a press conference.", + "length": 89 + }, + { + "text": "\"I am young and I get experience every time and you learn more from your losses than from your wins.", + "length": 100 + }, + { + "text": "\"She knew what she wanted to do and I need to get back on the court and practice and come back stronger.", + "length": 104 + }, + { + "text": "Afterwards, she told a press conference she was dedicating her victory to her brother's twins, Myla and Teo, who were born overnight.", + "length": 133 + }, + { + "text": "The Dane, who is yet to win a Grand Slam tournament, was beaten 6-1 6-3 in just 73 minutes to extend her miserable run at Roland Garros.", + "length": 136 + }, + { + "text": "(CNN) -- World number one Caroline Wozniacki suffered another morale-sapping defeat at the French Open as she was dumped out by 28th seed Daniela Hantuchova.", + "length": 157 + }, + { + "text": "Her best performance in Paris was reaching the quarterfinals in 2009 and she struggled against Hantuchova, from Slovakia, who was competing in her 41st major.", + "length": 158 + }, + { + "text": "\" After Wozniacki's exit, and Kim Clijsters defeat on Thursday, the trend for upsets continued as Australian 8th seed Samantha Stosur was beaten by unseeded Gisela Dulko.", + "length": 170 + }, + { + "text": "The Argentine set the tone by taking the first four games of the match, and though she dropped the second set, Stosur's 35 unforced errors counted as Dulko wrapped up the match.", + "length": 177 + }, + { + "text": "Serb Jelena Jankovic, the 10th seed, beat Bethanie Mattek-Sands, from the United States, 6-2 6-2, while 11th seed Marion Bartoli, from France, beat Julia Goerges, from Germany, 3-6 6-2 6-4.", + "length": 189 + }, + { + "text": "Russian third seed Vera Zvonareva beat Anastasia Rodionova, from Austria, 6-2 6-3 and Anastasia Pavlyuchenkova, the 14th seed, triumphed over Nuria Llagostera Vives, from Spain, 3-6 6-3 6-3.", + "length": 190 + }, + { + "text": "\" Elsewhere, defending champion and fifth seed, Francesca Schiavone, from Italy, had no such trouble making the fourth round after her Chinese opponent Shuai Peng retired hurt with the score at 6-3 1-2.", + "length": 202 + }, + { + "text": "Hantuchova reeled off nine straight games to seal the opening set and take a commanding lead in the second but Woznaicki offered herself hope when she secured her first break to reduce the deficit to 4-2.", + "length": 204 + }, + { + "text": "But Hantuchova held serve twice to seal one of the biggest victories of her career and set up a meeting with 2009 champion Svetlana Kuznetsova in the last 16, after she beat Canadian Rebecca Marino 6-0 6-4.", + "length": 206 + }, + { + "text": "The key to the match was to start well, to be aggressive from the start,\" Stosur added: \"She seemed to be out ahead a little bit better and really kind of be the one dictating the points, which, for me, I'm usually the one able to do that.", + "length": 239 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5301682353019714 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:13.079980736Z", + "first_section_created": "2025-12-23T09:35:13.08031855Z", + "last_section_published": "2025-12-23T09:35:13.080502257Z", + "all_results_received": "2025-12-23T09:35:13.145416026Z", + "output_generated": "2025-12-23T09:35:13.146012351Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:13.08031855Z", + "publish_time": "2025-12-23T09:35:13.080502257Z", + "first_worker_start": "2025-12-23T09:35:13.08104778Z", + "last_worker_end": "2025-12-23T09:35:13.144565Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:13.080986477Z", + "start_time": "2025-12-23T09:35:13.08104778Z", + "end_time": "2025-12-23T09:35:13.081115383Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:13.081259Z", + "start_time": "2025-12-23T09:35:13.081398Z", + "end_time": "2025-12-23T09:35:13.144565Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:13.080997478Z", + "start_time": "2025-12-23T09:35:13.08105908Z", + "end_time": "2025-12-23T09:35:13.081133583Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:13.081012278Z", + "start_time": "2025-12-23T09:35:13.081089982Z", + "end_time": "2025-12-23T09:35:13.081113382Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2762, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0057105909ba4914e85552334da57410fda8b839.json b/data/output/0057105909ba4914e85552334da57410fda8b839.json new file mode 100644 index 0000000..f5d96a1 --- /dev/null +++ b/data/output/0057105909ba4914e85552334da57410fda8b839.json @@ -0,0 +1,406 @@ +{ + "file_name": "0057105909ba4914e85552334da57410fda8b839.txt", + "total_words": 823, + "top_n_words": [ + { + "word": "the", + "count": 46 + }, + { + "word": "a", + "count": 27 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "was", + "count": 22 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "he", + "count": 17 + }, + { + "word": "his", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "savyon", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "No.", + "length": 3 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "His .", + "length": 5 + }, + { + "text": "to a .", + "length": 6 + }, + { + "text": "recently.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "End it all.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "'He was non-violent.", + "length": 20 + }, + { + "text": "Not in the slightest.", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "11:02 EST, 12 August 2013 .", + "length": 27 + }, + { + "text": "10:01 EST, 12 August 2013 .", + "length": 27 + }, + { + "text": "'Did I think he was suicidal?", + "length": 29 + }, + { + "text": "Did I think he was dangerous?", + "length": 29 + }, + { + "text": "‘He was a gracious opponent.", + "length": 30 + }, + { + "text": "The counselor escaped unharmed.", + "length": 31 + }, + { + "text": "' Perhaps most tragic is that the .", + "length": 35 + }, + { + "text": "Ap Reporter and Daily Mail Reporter .", + "length": 37 + }, + { + "text": "He would try to help anyone he could.", + "length": 37 + }, + { + "text": "Apparently this is what he was thinking.", + "length": 40 + }, + { + "text": "expected he was capable of killing his own son.", + "length": 47 + }, + { + "text": "to hire security officers - something they've done in the past.", + "length": 63 + }, + { + "text": "'They've had security in the past and were able to hire police .", + "length": 64 + }, + { + "text": "Police and emergency services responded at approximately 10:30 a.", + "length": 65 + }, + { + "text": "when he returned from Israel to bury his brother but that no one .", + "length": 66 + }, + { + "text": "report of shots fired inside the YWCA in Manchester, New Hampshire .", + "length": 68 + }, + { + "text": "rabbi, Levi Krinsky of Chabad Lubavitch, said the man was depressed .", + "length": 69 + }, + { + "text": "Vig divorced Savyon in 1997, but remained friends with her ex-husband.", + "length": 70 + }, + { + "text": "officers, but because of budget cuts they haven't been able to do that .", + "length": 72 + }, + { + "text": "incident could have been prevented if the organization had enough money .", + "length": 73 + }, + { + "text": "I am utterly stunned at the news concerning this morning's horrific events.", + "length": 75 + }, + { + "text": "'She had reservations which is why he was having supervised visits,' Strelzin said.", + "length": 83 + }, + { + "text": "The counselor who was present at the time managed to escape unarmed, officials said.", + "length": 84 + }, + { + "text": "He was a really nice guy,' she said from her home in North Billerica, Massachusetts.", + "length": 84 + }, + { + "text": "Intent: Savyon had threatened to kill himself, his son and the boy's mother before .", + "length": 84 + }, + { + "text": "The organization is open on Sundays for supervised child visitation and custody exchanges.", + "length": 90 + }, + { + "text": "Families: On Sundays the YWCA is open for supervised child visitation and custody exchanges .", + "length": 93 + }, + { + "text": "Response: Police arrived to the scene quickly and in force after 10am Sunday, forming a perimeter .", + "length": 99 + }, + { + "text": "He had a troubled relationship with the boy's mother and threatened to kill them and himself before.", + "length": 100 + }, + { + "text": "Muni, a software engineer, sent an email to a friend suggesting he was suicidal before the shootings.", + "length": 101 + }, + { + "text": "Others were inside the YWCA building when the shots were fired partway through the one-hour visitation.", + "length": 103 + }, + { + "text": "She described him as a kind-hearted person who loved his son and tried to give the boy varied experiences.", + "length": 106 + }, + { + "text": "Current New Hampshre State Representative Peter Sullivan said Savyon was a political hopeful in years past .", + "length": 108 + }, + { + "text": "Savyon  also recently lost his younger brother who suffered a heart attack in their home country of Israel.", + "length": 108 + }, + { + "text": "Shooter: Not knowing whether the shooter was alive or dead, police raided the building in a strategic manner .", + "length": 110 + }, + { + "text": "'There wasn't any security in place other than the fact that there was a counselor in the room,' Strelzin said.", + "length": 111 + }, + { + "text": "The boy's mother had dropped Joshua off at the YWCA around 9:30am and was not in the building when the shooting happened.", + "length": 121 + }, + { + "text": "'He was just very broken, which I thought was directly related to his brother's passing,' Rabbi Krinsky told the Boston Globe.", + "length": 126 + }, + { + "text": "The man who killed his 9-year-old son during a supervised visit Sunday apparently did so to spite the boy's mother, authorities said.", + "length": 133 + }, + { + "text": "‘Muni Savyon was my opponent in the special election a few years back,’ Sullivan, who easily beat Savyon, wrote on his Facebook Sunday.", + "length": 139 + }, + { + "text": "Dead: Muni Savyon shot and killed his 9-year-old son Joshua Sunday during a supervised parental visit at a YWCA in Manchester, New Hampshire .", + "length": 142 + }, + { + "text": "Motive: Savyon's rabbi, Levi Krinsky, said the man had become depressed after his younger brother died in Israel recently from a heart attack .", + "length": 143 + }, + { + "text": "The YWCA issued a statement late Sunday saying its 'thoughts and prayers go out to the families and friends of all those involved in this incident.", + "length": 147 + }, + { + "text": "The couple's marital status wasn't clear: Savyon's rabbi said the couple were divorced while law enforcement officials said the two were separated.", + "length": 147 + }, + { + "text": "Savyon was active in Libertarian causes, Vig said, and was a twice-defeated candidate for the New Hampshire Legislature, most recently in February.", + "length": 147 + }, + { + "text": "’ Born in Israel, Savyon was a naturalized citizen who lived in several Western states before coming to New England, where he worked as a software engineer, Vig said.", + "length": 168 + }, + { + "text": "' Savyon's ex-wife Ellen Vig - not the boy's mother - got a copy of the suicide email written in Hebrew which she says spells out his intentions and what he wants done with his property.", + "length": 186 + }, + { + "text": "54-year-old Muni Savyon was meeting with his son Joshua and a counselor at the YWCA in Manchester, New Hampshire when he brought out a hand gun and shot his son several times before turning the weapon on himself.", + "length": 212 + }, + { + "text": "'There's a lot of sadness and also anger on the part of everyone involved that a father would take his son's life for no other reason than apparently to spite his mother,' New Hampshire Assistant Attorney General Jeffery Strelzin told the Boston Herald.", + "length": 253 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5788214206695557 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:13.581271046Z", + "first_section_created": "2025-12-23T09:35:13.581582959Z", + "last_section_published": "2025-12-23T09:35:13.581830969Z", + "all_results_received": "2025-12-23T09:35:13.649152637Z", + "output_generated": "2025-12-23T09:35:13.649349645Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:13.581582959Z", + "publish_time": "2025-12-23T09:35:13.581830969Z", + "first_worker_start": "2025-12-23T09:35:13.582358791Z", + "last_worker_end": "2025-12-23T09:35:13.648213Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:13.582365491Z", + "start_time": "2025-12-23T09:35:13.582425394Z", + "end_time": "2025-12-23T09:35:13.582520697Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:13.58255Z", + "start_time": "2025-12-23T09:35:13.582689Z", + "end_time": "2025-12-23T09:35:13.648213Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:13.582272487Z", + "start_time": "2025-12-23T09:35:13.582359191Z", + "end_time": "2025-12-23T09:35:13.582471395Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:13.582291088Z", + "start_time": "2025-12-23T09:35:13.582358791Z", + "end_time": "2025-12-23T09:35:13.582391292Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4766, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0057863e126ceb0f22053aa1570a14977e5803ff.json b/data/output/0057863e126ceb0f22053aa1570a14977e5803ff.json new file mode 100644 index 0000000..a279052 --- /dev/null +++ b/data/output/0057863e126ceb0f22053aa1570a14977e5803ff.json @@ -0,0 +1,274 @@ +{ + "file_name": "0057863e126ceb0f22053aa1570a14977e5803ff.txt", + "total_words": 546, + "top_n_words": [ + { + "word": "the", + "count": 37 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "gillard", + "count": 10 + }, + { + "word": "he", + "count": 10 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "minister", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "local time (1:30 a.", + "length": 19 + }, + { + "text": "Gillard called the vote for 4:30 p.", + "length": 35 + }, + { + "text": "At the time he said he wouldn't try again.", + "length": 42 + }, + { + "text": "At the time, experts warned the tactic could backfire.", + "length": 54 + }, + { + "text": "\"I'm not prepared to dishonor my word,\" he told reporters.", + "length": 58 + }, + { + "text": "Soon after, she assigned him to the post of foreign minister.", + "length": 61 + }, + { + "text": "ET) after being pushed by long-time Labor minister Simon Crean.", + "length": 63 + }, + { + "text": "However the vote produced the first hung parliament in Australia since 1940.", + "length": 76 + }, + { + "text": "Gillard reinforced her claim to power by winning a general election in August 2010.", + "length": 83 + }, + { + "text": "\"It puts beyond doubt the question of leadership in the Australian Labor Party,\" he added.", + "length": 90 + }, + { + "text": "\"There was no vote because there were no opposing candidates,\" said ALP returning officer Chris Hayes.", + "length": 102 + }, + { + "text": "He was referring to comments he made last February after mounting an unsuccessful bid to depose Gillard.", + "length": 104 + }, + { + "text": "Before the vote, Crean said he wouldn't be nominating himself as leader, but would take the job of deputy.", + "length": 106 + }, + { + "text": "Crean called a press conference and dramatically challenged Rudd to stand up and bring an end to bitter infighting.", + "length": 115 + }, + { + "text": "Until Thursday, he was Minister for Regional Australia, Regional Development and Local Government, and Minister for the Arts.", + "length": 125 + }, + { + "text": "An uneasy tension has existed between Rudd and Gillard since his former deputy staged a successful bid to replace him in June 2010.", + "length": 131 + }, + { + "text": "The Welsh-born politician secured enough support from the Australian Greens Party, and independents, to form a minority government.", + "length": 131 + }, + { + "text": "In January, Gillard surprised the country by calling an election for September 14, the longest lead time for an Australian election in history.", + "length": 143 + }, + { + "text": "\" She said that leadership uncertainty that had been blighting the party in recent months was settled \"in the most conclusive fashion possible.", + "length": 143 + }, + { + "text": "In a short statement after the vote, Gillard said she accepted the support of her colleagues \"with a sense of deep humility and a sense of resolve.", + "length": 147 + }, + { + "text": "Minutes before the meeting of Australian Labor Party (ALP) caucus members, former leader Kevin Rudd made it clear that he would not be pitting himself against his former rival.", + "length": 176 + }, + { + "text": "Rudd lost that poll 31 to Gillard's 71, but the decisive vote failed to end speculation about a leadership challenge amid a poor performance by the prime minister in public polls.", + "length": 179 + }, + { + "text": "\" One hundred Labor caucus members had been due to vote, but in the end, no votes were cast because there were no candidates beyond Gillard for prime minister, and Wayne Swan as her deputy.", + "length": 189 + }, + { + "text": "(CNN) -- Australian Prime Minister Julia Gillard has survived another attempt to oust her from the job after no challengers emerged to vie for the leadership of the governing party and the country.", + "length": 197 + }, + { + "text": "'' The parties clashed over a number of issues, not least a controversial mining tax and a move by the government to reject World Heritage Listing for the Tarkine wilderness in north west Tasmania.", + "length": 197 + }, + { + "text": "\"I don't want any more games, I'm sick to death of it, it's about time he stood up and instead of having his camp leak things, actually have the courage of his conviction and his beliefs,\" Crean said.", + "length": 200 + }, + { + "text": "However last month, the Greens pulled their support, with leader Christine Milne accusing the Gillard government of ''walking away from its agreement with the Greens and into the arms of the big miners.", + "length": 202 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4473140835762024 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:14.083032176Z", + "first_section_created": "2025-12-23T09:35:14.08339069Z", + "last_section_published": "2025-12-23T09:35:14.083580098Z", + "all_results_received": "2025-12-23T09:35:14.142593024Z", + "output_generated": "2025-12-23T09:35:14.142784732Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:14.08339069Z", + "publish_time": "2025-12-23T09:35:14.083580098Z", + "first_worker_start": "2025-12-23T09:35:14.084084319Z", + "last_worker_end": "2025-12-23T09:35:14.141605Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:14.084080319Z", + "start_time": "2025-12-23T09:35:14.084159522Z", + "end_time": "2025-12-23T09:35:14.084223125Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:14.084279Z", + "start_time": "2025-12-23T09:35:14.084417Z", + "end_time": "2025-12-23T09:35:14.141605Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:14.084066618Z", + "start_time": "2025-12-23T09:35:14.084131621Z", + "end_time": "2025-12-23T09:35:14.084195223Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:14.084015616Z", + "start_time": "2025-12-23T09:35:14.084084319Z", + "end_time": "2025-12-23T09:35:14.08411762Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3195, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/00579a91246db0df52a7106cc6650c56c9fbc604.json b/data/output/00579a91246db0df52a7106cc6650c56c9fbc604.json new file mode 100644 index 0000000..b7c7260 --- /dev/null +++ b/data/output/00579a91246db0df52a7106cc6650c56c9fbc604.json @@ -0,0 +1,270 @@ +{ + "file_name": "00579a91246db0df52a7106cc6650c56c9fbc604.txt", + "total_words": 500, + "top_n_words": [ + { + "word": "in", + "count": 23 + }, + { + "word": "the", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "jenkins", + "count": 12 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "fiore", + "count": 8 + }, + { + "word": "said", + "count": 7 + }, + { + "word": "found", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Watch how suspect found in hotel » .", + "length": 37 + }, + { + "text": "The body was identified Monday as Fiore.", + "length": 40 + }, + { + "text": "Watch what led police to hotel room » .", + "length": 40 + }, + { + "text": "He was believed to be armed and dangerous.", + "length": 42 + }, + { + "text": "Police have not been able to identify the woman.", + "length": 48 + }, + { + "text": "\" CNN's Paul Vercammen contributed to this report.", + "length": 50 + }, + { + "text": "\"Clearly, the process did not work properly in this case.", + "length": 57 + }, + { + "text": "CNN has not confirmed reports that the marriage was annulled.", + "length": 61 + }, + { + "text": "Staff at a motel in Hope, British Columbia, found Jenkins dead, officials said.", + "length": 79 + }, + { + "text": "Police were hunting for Ryan Alexander Jenkins after the death of Jasmine Fiore.", + "length": 80 + }, + { + "text": "Jenkins, who appeared on the VH1 show \"Megan Wants a Millionaire,\" is from Calgary.", + "length": 83 + }, + { + "text": "And in 2007, Jenkins pleaded guilty in Calgary, Alberta, Canada to assault in a separate case.", + "length": 94 + }, + { + "text": "While the cause of death had not been confirmed, a preliminary coroner's report indicated she was strangled.", + "length": 108 + }, + { + "text": "51 Minds is investigating what went wrong and taking steps to ensure that this sort of lapse never occurs again.", + "length": 112 + }, + { + "text": "Law enforcement sources have told CNN that Fiore was identified through the serial numbers on her breast implants.", + "length": 114 + }, + { + "text": "Jenkins reported Fiore missing last Saturday night to the Los Angeles County Sheriff's Department, authorities said.", + "length": 116 + }, + { + "text": "Her teeth had been extracted and fingers removed in what police said was an apparent attempt to conceal her identity.", + "length": 117 + }, + { + "text": "The nude body of Jenkins' wife, former swimsuit model Jasmine Fiore, was found last weekend in Orange County, California.", + "length": 121 + }, + { + "text": "Fiore's body was found last Saturday in a Dumpster behind an apartment complex in Buena Park, just outside Anaheim, California.", + "length": 127 + }, + { + "text": "Fiore lived in Los Angeles and was last seen alive in San Diego at a poker game with Jenkins, the night before the body was found.", + "length": 130 + }, + { + "text": "Earlier Sunday, Canadian authorities said they had credible information that Jenkins was in Canada and called on him to turn himself in.", + "length": 136 + }, + { + "text": "Walker said a woman, about 20 to 25 years old, dropped off Jenkins at the motel on Friday in a silver Chrysler PT Cruiser with Alberta tags.", + "length": 140 + }, + { + "text": "\"It was a man hanging by a belt from a coat rack,\" Kevin Walker, the manager of the budget Thunderbird Motel, told CNN affiliate CTV on Sunday.", + "length": 143 + }, + { + "text": "According to court records in Las Vegas, Nevada, Jenkins was charged in June with battery for allegedly striking Fiore in the arm with his fist.", + "length": 144 + }, + { + "text": "51Minds, which produced \"Megan Wants a Millionaire,\" said Thursday in a written statement that it \"was not aware of Ryan Jenkins' record when it cast him.", + "length": 154 + }, + { + "text": "He was sentenced to 15 months probation, ordered to undergo counseling for domestic violence and sex addiction and to stay away from the person involved, according to court records.", + "length": 181 + }, + { + "text": "\"The company did have in place what it thought was a thorough vetting process that involved complete background checks by an outside company for all contestants on its shows,\" it said.", + "length": 184 + }, + { + "text": "(CNN) -- Ryan Alexander Jenkins, a reality TV contestant suspected in his wife's slaying, was found hanging from a coat rack in a motel room in an apparent suicide, according to Canadian officials.", + "length": 197 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7048727869987488 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:14.584350887Z", + "first_section_created": "2025-12-23T09:35:14.584596197Z", + "last_section_published": "2025-12-23T09:35:14.584749703Z", + "all_results_received": "2025-12-23T09:35:14.647928201Z", + "output_generated": "2025-12-23T09:35:14.648105408Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:14.584596197Z", + "publish_time": "2025-12-23T09:35:14.584749703Z", + "first_worker_start": "2025-12-23T09:35:14.585286425Z", + "last_worker_end": "2025-12-23T09:35:14.646947Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:14.585244424Z", + "start_time": "2025-12-23T09:35:14.585311426Z", + "end_time": "2025-12-23T09:35:14.585359328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:14.585468Z", + "start_time": "2025-12-23T09:35:14.585611Z", + "end_time": "2025-12-23T09:35:14.646947Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:14.585233023Z", + "start_time": "2025-12-23T09:35:14.585292326Z", + "end_time": "2025-12-23T09:35:14.585351128Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:14.585221123Z", + "start_time": "2025-12-23T09:35:14.585286425Z", + "end_time": "2025-12-23T09:35:14.585313026Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2963, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/0057af84b9b2ab099281d0f787005e64c6580e40.json b/data/output/0057af84b9b2ab099281d0f787005e64c6580e40.json new file mode 100644 index 0000000..ff5d648 --- /dev/null +++ b/data/output/0057af84b9b2ab099281d0f787005e64c6580e40.json @@ -0,0 +1,278 @@ +{ + "file_name": "0057af84b9b2ab099281d0f787005e64c6580e40.txt", + "total_words": 575, + "top_n_words": [ + { + "word": "the", + "count": 36 + }, + { + "word": "of", + "count": 30 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "their", + "count": 14 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "are", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "over", + "count": 8 + }, + { + "word": "that", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Becky Barrow .", + "length": 14 + }, + { + "text": "03:53 EST, 23 October 2012 .", + "length": 28 + }, + { + "text": "19:25 EST, 22 October 2012 .", + "length": 28 + }, + { + "text": "Rural investment firm NFU Mutual estimates that 1.", + "length": 50 + }, + { + "text": "‘At the root of this problem is a mismatch between expectation and reality.", + "length": 77 + }, + { + "text": "More people need to make financial provisions for their retirement to bridge this gap.", + "length": 86 + }, + { + "text": "It found 41 per cent will retire on less than two-thirds of their salary on retirement.", + "length": 87 + }, + { + "text": "A Department for Work and Pensions spokesman said: ‘The good news is more of us are living longer.", + "length": 100 + }, + { + "text": "’ A separate report out today has found that many older people expect they ‘will never retire’.", + "length": 101 + }, + { + "text": "4million people  aged over 64 are still working – and that four in ten said they will work until they drop.", + "length": 110 + }, + { + "text": "’ The IFS said millions of people aged between 50 and State pension age have a poor, or non-existent, pension.", + "length": 112 + }, + { + "text": "The richest families lost a total of £162,465 – £86,217 of their investments and £75,585 from the value of their home.", + "length": 123 + }, + { + "text": "Innocent: Dr Ros Altmann said that the over 50s were victims of financial problems caused by irresponsible borrowing and lending .", + "length": 130 + }, + { + "text": "The Institute for Fiscal Studies reckons the richest 20 per cent of the over-50s saw an average of £162,500 wiped off their assets.", + "length": 132 + }, + { + "text": "Many are faced with the stark choice of selling their home to release some money to live on, or continuing to work to make ends meet.", + "length": 133 + }, + { + "text": "Britain's over-50s lost an average of £60,000 during the worst of the credit crunch, a study from a leading think-tank reveals today.", + "length": 134 + }, + { + "text": "It is only when other sources of income, such as inheritances and other savings, are taken into account that the number drops to just 10 per cent.", + "length": 146 + }, + { + "text": "Steven Meredith, a pension specialist at NFU Mutual, said: ‘For growing numbers of people, retirement  might not be one option they can afford to take.", + "length": 154 + }, + { + "text": "The IFS report says: ‘The impact on this age group may be of particular concern because they are unlikely to be able to make up the losses  later in life.", + "length": 157 + }, + { + "text": "The loss is a combination of the fall in the value of their home and the drop in the value of their investments, such as tax-free Isas and  their shareholdings.", + "length": 161 + }, + { + "text": "Dr Ros Altmann, of old age specialists Saga, said: ‘The over-50s are innocent victims of the financial problems caused by irresponsible borrowing and lending by others.", + "length": 170 + }, + { + "text": "Impact: The richest 20 per cent of over 50s have seen an average £162,000 wiped off their assets thanks to economic downturn, while the average lost was nearly £60,000 .", + "length": 171 + }, + { + "text": "These households lost an average of £59,992, equal to 10 per cent of their assets, with £26,666 knocked off  their investments and £33,359 off  the value of their home.", + "length": 173 + }, + { + "text": "’ The IFS looked at the impact on personal finances of households in England that contained at least one over-50 between May 2007 and March 2009, the height of the credit crunch.", + "length": 180 + }, + { + "text": "‘Middle-class pension savers are finding that the income they were expecting will not materialise, due to falls in the value of their investments or rises in the costs of buying annuities.", + "length": 190 + }, + { + "text": "‘This means people are going to have to work beyond the traditional retirement age if they are to afford a good quality of life in retirement, and that’s why the state pension age will rise over the coming years for men and for women.", + "length": 238 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4666827321052551 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:15.085497776Z", + "first_section_created": "2025-12-23T09:35:15.087446755Z", + "last_section_published": "2025-12-23T09:35:15.087609462Z", + "all_results_received": "2025-12-23T09:35:15.158632833Z", + "output_generated": "2025-12-23T09:35:15.15881104Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:15.087446755Z", + "publish_time": "2025-12-23T09:35:15.087609462Z", + "first_worker_start": "2025-12-23T09:35:15.088171884Z", + "last_worker_end": "2025-12-23T09:35:15.157692Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:15.088154984Z", + "start_time": "2025-12-23T09:35:15.088234387Z", + "end_time": "2025-12-23T09:35:15.088332991Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:15.088321Z", + "start_time": "2025-12-23T09:35:15.088469Z", + "end_time": "2025-12-23T09:35:15.157692Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:15.088096881Z", + "start_time": "2025-12-23T09:35:15.088171984Z", + "end_time": "2025-12-23T09:35:15.088281689Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:15.088102382Z", + "start_time": "2025-12-23T09:35:15.088171884Z", + "end_time": "2025-12-23T09:35:15.088210686Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 69, + "min_processing_ms": 69, + "max_processing_ms": 69, + "avg_processing_ms": 69, + "median_processing_ms": 69, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3242, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/0057e887ec89de912dd8141a60b746194f95ea62.json b/data/output/0057e887ec89de912dd8141a60b746194f95ea62.json new file mode 100644 index 0000000..47ed13d --- /dev/null +++ b/data/output/0057e887ec89de912dd8141a60b746194f95ea62.json @@ -0,0 +1,452 @@ +{ + "file_name": "0057e887ec89de912dd8141a60b746194f95ea62.txt", + "total_words": 1091, + "top_n_words": [ + { + "word": "the", + "count": 59 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "hong", + "count": 26 + }, + { + "word": "kong", + "count": 26 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "in", + "count": 22 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "for", + "count": 20 + }, + { + "word": "film", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "industry.", + "length": 9 + }, + { + "text": "3 million.", + "length": 10 + }, + { + "text": "I disagree.", + "length": 11 + }, + { + "text": "\" E-mail to a friend .", + "length": 22 + }, + { + "text": "but not many Chinese movies.", + "length": 28 + }, + { + "text": "Why work for a sunset industry?", + "length": 31 + }, + { + "text": "China says: 'You are not Chinese.", + "length": 33 + }, + { + "text": "In Hong Kong we all speak Cantonese.", + "length": 36 + }, + { + "text": "\" The box office numbers are sobering.", + "length": 38 + }, + { + "text": "\" Still, Kerr sees reason for optimism.", + "length": 39 + }, + { + "text": "So we are not good at speaking Putonghua.", + "length": 41 + }, + { + "text": "\"Why do we have so many problems with China?", + "length": 44 + }, + { + "text": "\"Even if the films don't work, someone tried.", + "length": 45 + }, + { + "text": "The two parties end up conversing in English.", + "length": 45 + }, + { + "text": "\"That's me,\" says Chan, referring to the girls.", + "length": 47 + }, + { + "text": "\"The drive to attain world adoration,\" Kerr says.", + "length": 49 + }, + { + "text": "\"The film industry was already almost dead in the '90s.", + "length": 55 + }, + { + "text": "\" Adds independent director Yan-yan Mak: \"We are monsters.", + "length": 58 + }, + { + "text": "' After 1997, we lost the confidence to be Hong Kong people.", + "length": 60 + }, + { + "text": "\"Koreans truly believe they're making great art all the time.", + "length": 61 + }, + { + "text": "By 2006, those figures slumped to 51 films grossing $37 million.", + "length": 64 + }, + { + "text": "But Mak points to a shortage of creativity in the local industry.", + "length": 65 + }, + { + "text": "' Gweilos [Hong Kong slang for Caucasians] say: 'You are Chinese.", + "length": 65 + }, + { + "text": "I'm proud to have grown up during the transition between 1997 and SAR.", + "length": 70 + }, + { + "text": "In the early 1990s, Hong Kong released around 200 local features a year.", + "length": 72 + }, + { + "text": "\" In 2006, Hong Kong closed five small cinemas and re-opened one multiplex.", + "length": 75 + }, + { + "text": "Even in the cinema, we see Western movies, Japanese movies, Korean movies...", + "length": 76 + }, + { + "text": "\"On the other hand, it also commenced the opening up of the mainland market.", + "length": 76 + }, + { + "text": "\" How is South Korea's film industry different from its Hong Kong counterpart?", + "length": 78 + }, + { + "text": "Hong Kong already has a bad reputation for Putonghua, but I don't feel ashamed.", + "length": 79 + }, + { + "text": "Because our language, our culture, our values, our way of thinking are different.", + "length": 81 + }, + { + "text": "Stephen Chow's \"Shaolin Soccer\" is one of Hong Kong's all-time top-grossing films.", + "length": 82 + }, + { + "text": "Many Chinese say that now that Hong Kong is part of China, Hong Kong people must learn Putonghua.", + "length": 97 + }, + { + "text": "\"I speak English better than Putonghua [China's official common language, also known as Mandarin].", + "length": 98 + }, + { + "text": "\"Today production is down, with many majors like Chinastar and Golden Harvest scaling back,\" he says.", + "length": 101 + }, + { + "text": "\"No talents, no formal training, in most areas such as script-writing, directing, acting, etc,\" he says.", + "length": 104 + }, + { + "text": "At the same time, local film-makers have had to refocus their cameras for a new audience: mainland China.", + "length": 105 + }, + { + "text": "\" Elizabeth Kerr, film critic and curator formerly based in Seoul, South Korea, agrees with Youngs' assessment.", + "length": 111 + }, + { + "text": "\"Around 1997, like lots of Hong Kong people, I kind of lost myself,\" says independent film-maker Chan Wing-chiu.", + "length": 112 + }, + { + "text": "\"Most films are high-end productions with big stars, or low-end made with a shoestring budget for an easier return.", + "length": 115 + }, + { + "text": "\"Unfortunately, it seemed to be blinded by the 'golden age' of the '80s, where any film could easily rack in over $1.", + "length": 117 + }, + { + "text": "Some once video distributors like Mei Ah and Universe have gone into production as a way to keep the pipeline flowing.", + "length": 118 + }, + { + "text": "\" The new Chinese market has translated into an emphasis in contrasts of Hong Kong-made films, says film archivist Cheng.", + "length": 121 + }, + { + "text": "What needs not to be spelt out for film-makers venturing north is that one plays according to what goes with the territory.", + "length": 123 + }, + { + "text": "A trend of fewer films being produced each year in Hong Kong at the time of the handover has continued into the 21st century.", + "length": 125 + }, + { + "text": "\"Even the independent scene still needs more real talents -- or at least, real producers to pull together a really good project.", + "length": 128 + }, + { + "text": "Ten years ago, the top 10 grossing films accounted for 47 percent of the total box-office return; today, the portion is 58 percent.", + "length": 131 + }, + { + "text": "South Korea launched an active campaign on all levels -- corporate, government, education -- to train and cultivate a modern film industry.", + "length": 139 + }, + { + "text": "\"For all the risk-taking businessmen out there [in Hong Kong], no one is willing to put their money where their mouth is and throw in some support.", + "length": 147 + }, + { + "text": "HONG KONG, China (CNN) -- In the decade since the 1997 handover of Hong Kong to China, local movie-makers have faced daunting changes in the industry.", + "length": 150 + }, + { + "text": "\"1997, unfortunately, coincides with the beginning of the collapse of the local film industry -- a well-documented fact,\" says screenwriter Jimmy Ngai.", + "length": 151 + }, + { + "text": "\" Tim Youngs, Hong Kong consultant for Italy's Far East Film Festival, says changing tastes among Hong Kong movie-goers has also affected the industry.", + "length": 151 + }, + { + "text": "\"Audiences have become increasingly dismissive of local movies, often referring to them as poor quality, and there are much fewer paying cinemagoers these days.", + "length": 160 + }, + { + "text": "By 1997, that number dropped to 85 films grossing $69 million, according to the Hong Kong, Kowloon and New Territories Motion Picture Industry Association (MPIA).", + "length": 162 + }, + { + "text": "Films that best retain a Hong Kong style, Kerr maintains, likely carry \"Category III\" (under 18 not allowed) ratings: Movies that are \"grown up and smart,\" she says.", + "length": 165 + }, + { + "text": "\"The result is that the industry has grown more and more accustomed to looking north for both investment and box return -- nothing political, but more of a survival instinct.", + "length": 174 + }, + { + "text": "Gary Mak, director of Broadway Cinematheque -- Hong Kong's last-remaining alternative-screening venue -- remains optimistic about more adventurous programming and distribution.", + "length": 176 + }, + { + "text": "\"The industry for the most part suffers from the cleave between that fluff -- which makes money -- and the more adult film-making of the smaller studios, distributors and indies.", + "length": 178 + }, + { + "text": "\"The number of screens is also down, with the consolidation of more multiplexes, usually owned or partly owned by distributors, which already have a steady supply of foreign films to fill the screens.", + "length": 200 + }, + { + "text": "\" Chan's own first feature in 2005, \"A Side, B Side, Sea Side,\" includes a scene with a gaggle of girls on Hong Kong's Cheung Chau island who are unable to communicate in Chinese with an Australian man speaking fluent Mandarin.", + "length": 227 + }, + { + "text": "\"The Hong Kong film industry came to a rude awakening [in the late 1990s] that the world was changing faster than it was in the age of new delivery systems for home entertainment and the Internet,\" says Bede Cheng, a local film archivist and curator.", + "length": 250 + }, + { + "text": "People in Hong Kong's industry point to several causes for the comparatively leaner times: a lack of opportunities for new acting talent, inadequate training and schooling for people who produce movies and changing tastes within the Hong Kong public.", + "length": 250 + }, + { + "text": "\"So the hometown audience shows less support for local movies, whether by not seeing local films or opting for piracy, while the declining number of films means less opportunities for film-makers, fewer chances to try out new things, and damage to confidence.", + "length": 259 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5161052346229553 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:15.588394607Z", + "first_section_created": "2025-12-23T09:35:15.58871962Z", + "last_section_published": "2025-12-23T09:35:15.589164938Z", + "all_results_received": "2025-12-23T09:35:15.718133952Z", + "output_generated": "2025-12-23T09:35:15.718479466Z", + "total_processing_time_ms": 130, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 128, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:15.58871962Z", + "publish_time": "2025-12-23T09:35:15.589025832Z", + "first_worker_start": "2025-12-23T09:35:15.589544953Z", + "last_worker_end": "2025-12-23T09:35:15.709421Z", + "total_journey_time_ms": 120, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:15.589493751Z", + "start_time": "2025-12-23T09:35:15.589587955Z", + "end_time": "2025-12-23T09:35:15.589690659Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:15.589797Z", + "start_time": "2025-12-23T09:35:15.589974Z", + "end_time": "2025-12-23T09:35:15.709421Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 119 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:15.589631457Z", + "start_time": "2025-12-23T09:35:15.589685159Z", + "end_time": "2025-12-23T09:35:15.589791363Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:15.58946895Z", + "start_time": "2025-12-23T09:35:15.589544953Z", + "end_time": "2025-12-23T09:35:15.589583055Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:15.589063934Z", + "publish_time": "2025-12-23T09:35:15.589164938Z", + "first_worker_start": "2025-12-23T09:35:15.589680859Z", + "last_worker_end": "2025-12-23T09:35:15.717108Z", + "total_journey_time_ms": 128, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:15.589687159Z", + "start_time": "2025-12-23T09:35:15.589730961Z", + "end_time": "2025-12-23T09:35:15.589762862Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:15.589919Z", + "start_time": "2025-12-23T09:35:15.590102Z", + "end_time": "2025-12-23T09:35:15.717108Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 127 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:15.589629257Z", + "start_time": "2025-12-23T09:35:15.589680859Z", + "end_time": "2025-12-23T09:35:15.58972546Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:15.589599255Z", + "start_time": "2025-12-23T09:35:15.58971326Z", + "end_time": "2025-12-23T09:35:15.589727561Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 246, + "min_processing_ms": 119, + "max_processing_ms": 127, + "avg_processing_ms": 123, + "median_processing_ms": 127, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3217, + "slowest_section_id": 1, + "slowest_section_time_ms": 128 + } +} diff --git a/data/output/005804231660cfba04a959d6d88cae81d144c2e2.json b/data/output/005804231660cfba04a959d6d88cae81d144c2e2.json new file mode 100644 index 0000000..880751d --- /dev/null +++ b/data/output/005804231660cfba04a959d6d88cae81d144c2e2.json @@ -0,0 +1,194 @@ +{ + "file_name": "005804231660cfba04a959d6d88cae81d144c2e2.txt", + "total_words": 154, + "top_n_words": [ + { + "word": "of", + "count": 9 + }, + { + "word": "police", + "count": 9 + }, + { + "word": "the", + "count": 8 + }, + { + "word": "were", + "count": 5 + }, + { + "word": "in", + "count": 4 + }, + { + "word": "three", + "count": 4 + }, + { + "word": "a", + "count": 3 + }, + { + "word": "and", + "count": 3 + }, + { + "word": "on", + "count": 3 + }, + { + "word": "pakistan", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "Journalist Nasir Habib contributed to this report .", + "length": 51 + }, + { + "text": "Three police officers were injured and parts of the buildings were damaged.", + "length": 75 + }, + { + "text": "Police said there was no immediate claim of responsibility for the attacks.", + "length": 75 + }, + { + "text": "Gujranwala has been largely free of the militant violence plaguing parts of Pakistan.", + "length": 85 + }, + { + "text": "On Monday, two people were injured when in back to back explosions at police stations in the southern port city of Karachi.", + "length": 123 + }, + { + "text": "They were detonated by remote control this morning in the city of Gujranwala, 70 kilometers northwest of Lahore, Dogar said.", + "length": 124 + }, + { + "text": "Islamabad, Pakistan (CNN) -- Bomb blasts in Pakistan rocked three police buildings near Lahore, police told CNN on Wednesday.", + "length": 125 + }, + { + "text": "The first two explosions happened within 30 minutes of one another, and the third bomb was detonated about three hours later.", + "length": 125 + }, + { + "text": "Police chief Ghulam Mehmood Dogar said the three bombs were planted on Tuesday night at a police station, an office of a senior police official and a police barracks.", + "length": 166 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7678933143615723 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:16.0903733Z", + "first_section_created": "2025-12-23T09:35:16.090680512Z", + "last_section_published": "2025-12-23T09:35:16.09087862Z", + "all_results_received": "2025-12-23T09:35:16.152797124Z", + "output_generated": "2025-12-23T09:35:16.152884127Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:16.090680512Z", + "publish_time": "2025-12-23T09:35:16.09087862Z", + "first_worker_start": "2025-12-23T09:35:16.091470044Z", + "last_worker_end": "2025-12-23T09:35:16.150817Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:16.091419642Z", + "start_time": "2025-12-23T09:35:16.091470044Z", + "end_time": "2025-12-23T09:35:16.091488345Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:16.091729Z", + "start_time": "2025-12-23T09:35:16.091884Z", + "end_time": "2025-12-23T09:35:16.150817Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:16.091424243Z", + "start_time": "2025-12-23T09:35:16.091483445Z", + "end_time": "2025-12-23T09:35:16.091514346Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:16.091503946Z", + "start_time": "2025-12-23T09:35:16.091551748Z", + "end_time": "2025-12-23T09:35:16.091564748Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 957, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/00584d3727e3b608c9dda8df7ea45c8148c11b51.json b/data/output/00584d3727e3b608c9dda8df7ea45c8148c11b51.json new file mode 100644 index 0000000..aadc082 --- /dev/null +++ b/data/output/00584d3727e3b608c9dda8df7ea45c8148c11b51.json @@ -0,0 +1,330 @@ +{ + "file_name": "00584d3727e3b608c9dda8df7ea45c8148c11b51.txt", + "total_words": 580, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "with", + "count": 13 + }, + { + "word": "she", + "count": 12 + }, + { + "word": "lindsay", + "count": 11 + }, + { + "word": "her", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "and", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "' Threads for rehab?", + "length": 20 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "think she has a problem.", + "length": 24 + }, + { + "text": "03:04 EST, 25 March 2013 .", + "length": 26 + }, + { + "text": "11:50 EST, 22 March 2013 .", + "length": 26 + }, + { + "text": "'Lindsay's pre-rehab plan?", + "length": 26 + }, + { + "text": "good head on his shoulders.", + "length": 27 + }, + { + "text": "Lindsay will undergo her sixth .", + "length": 32 + }, + { + "text": "basically, he knows who is and .", + "length": 32 + }, + { + "text": "Friends hope he will be a good .", + "length": 32 + }, + { + "text": "scene, he doesn't even drink much...", + "length": 36 + }, + { + "text": "likes control, he could help Lindsay.", + "length": 37 + }, + { + "text": "She must begin treatment before May 2.", + "length": 38 + }, + { + "text": "I kid you not,' a source told the website.", + "length": 42 + }, + { + "text": "The star picked up a few items at a vintage store .", + "length": 51 + }, + { + "text": "A week with Charlie Sheen in LA, then she wants to go to Brazil.", + "length": 64 + }, + { + "text": "influence on the troubled starlet, with one saying: 'Avi's not a .", + "length": 66 + }, + { + "text": "attempt at getting sober when she begins her court ordered stint in .", + "length": 69 + }, + { + "text": "and reliance on legal medications but the actress reportedly doesn't .", + "length": 70 + }, + { + "text": "rehab and her treatment will focus on her 'large' alcohol consumption .", + "length": 71 + }, + { + "text": "feckless party animal, he works in that industry, sure, but he's got a .", + "length": 72 + }, + { + "text": "“He's from a solid background and doesn't get involved with the drugs .", + "length": 73 + }, + { + "text": "She stopped in at the Gotta Have It vintage store where she bought some clothes.", + "length": 80 + }, + { + "text": "She was arrested in November for allegedly punching the woman inside Club Avenue.", + "length": 81 + }, + { + "text": "Of course, Venice was Lindsay's old stomping ground before she moved to the East Coast.", + "length": 87 + }, + { + "text": "It's the first time Lindsay has been pictured with Avi, who is a rocker with City of the Sun.", + "length": 93 + }, + { + "text": "The Mean Girls star was casually dressed in jeans, a grey vest and no bra, with her matted hair hanging loose.", + "length": 110 + }, + { + "text": "She'll soon be checking into a lock-down rehab facility, but Lindsay Lohan is certainly making the most of her freedom.", + "length": 119 + }, + { + "text": "However witnesses to the incident, including those connected with the alleged victim, refused to speak to investigators.", + "length": 120 + }, + { + "text": "Rehab is part of a plea deal agreed this week in Los Angeles Superior Court relating to charges over a July car accident.", + "length": 121 + }, + { + "text": "Meanwhile it also emerged she will not face charges despite being arrested over a New York City nightclub brawl with another woman.", + "length": 131 + }, + { + "text": "The 26-year-old was spotted in Venice, California on Friday with two male companions, including her rumoured new boyfriend Avi Snow.", + "length": 132 + }, + { + "text": "Law enforcement sources told TMZ the Manhattan district attorney's office will not prosecute Lohan because witnesses failed to cooperate.", + "length": 137 + }, + { + "text": "Lindsay and Charlie, 47, got to know each other when they filmed the upcoming comedy Scary Move 5, which is due to hit theatres on April 15.", + "length": 140 + }, + { + "text": "Not so romantic: Heller insists there's 'nothing serious' between Lindsay and musician Avi Snow, seen here with his band members from City of the Sun .", + "length": 151 + }, + { + "text": "Rehab-bound: Lindsay is to spend 90 days in an in-patient facility as part of a plea deal agreed in court in March 18 which relate to charges over a July car accident .", + "length": 168 + }, + { + "text": "In the meantime, Lindsay is reportedly going to spend a week with Charlie during which she will tape a guest appearance on his FX show Anger Management, according to Perezhilton.", + "length": 178 + }, + { + "text": "It is claimed another problem with the case is the woman she allegedly hit tripped and fell while being escorted out of the club by police, so it was unclear what really caused her injuries.", + "length": 190 + }, + { + "text": "Hanging loose: Lindsay Lohan was spotted in Venice, California, on Thursday with her rumoured new boyfriend Avi Snow (far right) Free spirit: The star, who was joined by another male companion, went without a bra .", + "length": 214 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.48064860701560974 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:16.591696667Z", + "first_section_created": "2025-12-23T09:35:16.593770151Z", + "last_section_published": "2025-12-23T09:35:16.593922257Z", + "all_results_received": "2025-12-23T09:35:16.665137636Z", + "output_generated": "2025-12-23T09:35:16.665320943Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:16.593770151Z", + "publish_time": "2025-12-23T09:35:16.593922257Z", + "first_worker_start": "2025-12-23T09:35:16.594440878Z", + "last_worker_end": "2025-12-23T09:35:16.664227Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:16.594439878Z", + "start_time": "2025-12-23T09:35:16.59450138Z", + "end_time": "2025-12-23T09:35:16.594564183Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:16.594667Z", + "start_time": "2025-12-23T09:35:16.594839Z", + "end_time": "2025-12-23T09:35:16.664227Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:16.594392276Z", + "start_time": "2025-12-23T09:35:16.594440878Z", + "end_time": "2025-12-23T09:35:16.594531581Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:16.594438478Z", + "start_time": "2025-12-23T09:35:16.59449458Z", + "end_time": "2025-12-23T09:35:16.594520181Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 69, + "min_processing_ms": 69, + "max_processing_ms": 69, + "avg_processing_ms": 69, + "median_processing_ms": 69, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3277, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/0058785aa3eaaa3f45953d3c98f797900fdeb583.json b/data/output/0058785aa3eaaa3f45953d3c98f797900fdeb583.json new file mode 100644 index 0000000..1702292 --- /dev/null +++ b/data/output/0058785aa3eaaa3f45953d3c98f797900fdeb583.json @@ -0,0 +1,428 @@ +{ + "file_name": "0058785aa3eaaa3f45953d3c98f797900fdeb583.txt", + "total_words": 1031, + "top_n_words": [ + { + "word": "and", + "count": 41 + }, + { + "word": "the", + "count": 39 + }, + { + "word": "a", + "count": 35 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "to", + "count": 25 + }, + { + "word": "yoga", + "count": 20 + }, + { + "word": "i", + "count": 17 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "it", + "count": 16 + }, + { + "word": "with", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "lustfestival.", + "length": 13 + }, + { + "text": "Ruby Warrington .", + "length": 17 + }, + { + "text": "Cue more waterworks.", + "length": 20 + }, + { + "text": "16:07 EST, 10 August 2013 .", + "length": 27 + }, + { + "text": "16:00 EST, 10 August 2013 .", + "length": 27 + }, + { + "text": "The Wanderlust festival (wander .", + "length": 33 + }, + { + "text": "Each pose is linked with one breath.", + "length": 36 + }, + { + "text": "Focus on relaxation and self-awareness.", + "length": 39 + }, + { + "text": "Said to burn up to 600 calories a class.", + "length": 40 + }, + { + "text": "And there are tears streaming down my face.", + "length": 43 + }, + { + "text": "It means there is a class  for everyone now.", + "length": 45 + }, + { + "text": "Yoga originated in India, but has become mainstream.", + "length": 52 + }, + { + "text": "Said to develop cardiovascular fitness and body strength.", + "length": 57 + }, + { + "text": "But on my mat in Vermont, I experience a yogic awakening.", + "length": 57 + }, + { + "text": "Cuts stress, improves breathing and builds back strength.", + "length": 57 + }, + { + "text": "com), now in its third year, travels to six different sites .", + "length": 61 + }, + { + "text": "So this is the mind, body and spirit effect that people talk about.", + "length": 67 + }, + { + "text": "in the United States during the summer (it also went to Chile this .", + "length": 68 + }, + { + "text": "year), and is an example of how mainstream yoga has become in America.", + "length": 70 + }, + { + "text": "Not to mention the lengthening, tightening effect it has on your body.", + "length": 70 + }, + { + "text": "lower-back pain found less disability,  pain and depression after six months.", + "length": 78 + }, + { + "text": "The class I’ve just finished was titled Radiant Body: Your Practise As Medicine.", + "length": 82 + }, + { + "text": "And judging from the sobs and sniffles on the mats around me, I’m not the only one.", + "length": 85 + }, + { + "text": "Mind bent: Ruby Warrington has fallen for traditional yoga after attending Wanderlust .", + "length": 87 + }, + { + "text": "Bikram: A class in a sauna-like room where attendees go twice through a series of 26 postures.", + "length": 94 + }, + { + "text": "Iyengar: Suitable for all ages and abilities and uses equipment such as foam blocks and straps.", + "length": 95 + }, + { + "text": "A small study of children found they lost weight and felt less anxiety after 12 weeks of ashtanga yoga.", + "length": 103 + }, + { + "text": "Take the Detox Flow Yoga session, taught by a woman many regard  as the high priestess of yoga, Seane Corn.", + "length": 108 + }, + { + "text": "But after my weekend of healing, I can finally see the point of more gentle, breathing-focused schools of yoga.", + "length": 111 + }, + { + "text": "Mind over mats: A mass yoga class at a previous Wanderlust festival - far from the muddy rock events of the UK .", + "length": 112 + }, + { + "text": "I even ate lunch next to a couple who had brought a nanny to watch the kids while they took back-to-back classes.", + "length": 113 + }, + { + "text": "Kundalini: The ‘yoga  of awareness’, it combines breathing exercises, postures, chanting mantras and meditation.", + "length": 117 + }, + { + "text": "I have always approached yoga from a purely athletic standpoint, seeing it mainly as an opportunity for a good stretch.", + "length": 119 + }, + { + "text": "As for my teary experience on the mat, once the floodgates had opened, I could barely get through a class without crying.", + "length": 121 + }, + { + "text": "A photographer who specialises in nudes once told me that without doubt, the best bodies she saw naked were the yoga bodies.", + "length": 124 + }, + { + "text": "And much to the distaste of the purists, all sorts of styles of yoga have been developed to suit different bodies and tastes.", + "length": 125 + }, + { + "text": "But if I was expecting a practical guide to easing physical aches and pains, it’s evidently aimed more at emotional therapy.", + "length": 126 + }, + { + "text": "Hatha: An umbrella  term – Hatha means a gentle, traditional approach with basic postures, meditation and breathing exercises.", + "length": 129 + }, + { + "text": "There are myriad studies to show yoga can help with everything from osteoarthritis and asthma to stress and chronic lower back pain.", + "length": 132 + }, + { + "text": "But I can’t stop thinking about a very different kind of festival – and what feels like only the beginning  of my yogic journey.", + "length": 133 + }, + { + "text": "It has also been proven to be useful during pregnancy and labour, and in improving cognitive function and perceived stress during the menopause.", + "length": 144 + }, + { + "text": "A 2012 study discovered that bikram was associated with better sleep, while other research found it increased strength and flexibility and decreased weight.", + "length": 156 + }, + { + "text": "Ashtanga: Fast- moving, demanding yoga comprising six sequences of poses, which become progressively harder – participants advance as their skill develops.", + "length": 157 + }, + { + "text": "In the UK, as many as a million people regularly do yoga, and September is National Yoga Month when more than 2,200 studios will offer a week of free trial classes.", + "length": 164 + }, + { + "text": "The following weekend, feeling nostalgic for summertime back the UK, my husband and I catch up with friends who regale us with tales of the rock festivals they’ve been to.", + "length": 173 + }, + { + "text": "I am lying on my back in a field amid hundreds of people in stretchy Lycra clothing at the Wanderlust yoga festival in Vermont – imagine a mini Glastonbury, but with added headstands.", + "length": 185 + }, + { + "text": "My favourite kind of yoga has always been Bikram, the insanely hot class that makes you sweat buckets and gives you muscles like Popeye, because to me it felt like a ‘proper’ workout.", + "length": 187 + }, + { + "text": "It’s estimated that 20 million people in the States now regularly hit the mat, while spending on yoga related products reached £17 billion last year – up 87 per cent in five years.", + "length": 189 + }, + { + "text": "If I was expecting a dreadlocked crowd of blissed-out hippies, the Vermont event seemed mainly to attract affluent New Yorkers decked out in trendy yoga label Lululemon Athletica (which plans to launch in the UK later this year).", + "length": 229 + }, + { + "text": "The waterworks began when we moved into a deeper stretch and it became obvious that getting over it once and for all would mean practising some forgiveness and gratitude – it was time to forgive myself for allowing it to happen, and to thank him for showing me what I didn’t want from a relationship.", + "length": 304 + }, + { + "text": "As she lectured a 400-strong class of mildly hung-over yogis about how we often use alcohol and other substances as an emotional crutch, it became very clear I do exactly that – and that my love affair with booze began when I needed Dutch courage to extricate myself from the aforementioned bad romance.", + "length": 305 + }, + { + "text": "Moments earlier, when the teacher asked us to visualise an area of our life that needed healing, a vivid impression of the ex-boyfriend I hadn’t thought about for years appeared in my mind, and with it a profound realisation that I never properly dealt with the fallout from that destructive relationship.", + "length": 307 + }, + { + "text": "Meanwhile, my lunch might have been a vegan, gluten-free pizza, but evening entertainment also included ‘Winederlust’ – a bar serving organic and biodynamic wine – and a lavish, four-course ‘farm-to-table’ feast (with steak and home-made limoncello), tickets for which cost $85 each and sold out way ahead.", + "length": 318 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4703902006149292 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:17.094364088Z", + "first_section_created": "2025-12-23T09:35:17.094742103Z", + "last_section_published": "2025-12-23T09:35:17.095247624Z", + "all_results_received": "2025-12-23T09:35:17.180635276Z", + "output_generated": "2025-12-23T09:35:17.180845584Z", + "total_processing_time_ms": 86, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 85, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:17.094742103Z", + "publish_time": "2025-12-23T09:35:17.095079217Z", + "first_worker_start": "2025-12-23T09:35:17.095454332Z", + "last_worker_end": "2025-12-23T09:35:17.180109Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:17.095478333Z", + "start_time": "2025-12-23T09:35:17.095559936Z", + "end_time": "2025-12-23T09:35:17.095673541Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:17.095754Z", + "start_time": "2025-12-23T09:35:17.095948Z", + "end_time": "2025-12-23T09:35:17.180109Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:17.095463232Z", + "start_time": "2025-12-23T09:35:17.095531135Z", + "end_time": "2025-12-23T09:35:17.095695042Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:17.095386529Z", + "start_time": "2025-12-23T09:35:17.095454332Z", + "end_time": "2025-12-23T09:35:17.095499634Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:17.09516492Z", + "publish_time": "2025-12-23T09:35:17.095247624Z", + "first_worker_start": "2025-12-23T09:35:17.095561536Z", + "last_worker_end": "2025-12-23T09:35:17.178245Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:17.095586637Z", + "start_time": "2025-12-23T09:35:17.095616539Z", + "end_time": "2025-12-23T09:35:17.09565824Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:17.096006Z", + "start_time": "2025-12-23T09:35:17.096138Z", + "end_time": "2025-12-23T09:35:17.178245Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:17.09564054Z", + "start_time": "2025-12-23T09:35:17.095687842Z", + "end_time": "2025-12-23T09:35:17.095724543Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:17.095524535Z", + "start_time": "2025-12-23T09:35:17.095561536Z", + "end_time": "2025-12-23T09:35:17.095573937Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 166, + "min_processing_ms": 82, + "max_processing_ms": 84, + "avg_processing_ms": 83, + "median_processing_ms": 84, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3033, + "slowest_section_id": 0, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/0058a370ff12386a55b24ee56dd277e9c3b23a72.json b/data/output/0058a370ff12386a55b24ee56dd277e9c3b23a72.json new file mode 100644 index 0000000..4571042 --- /dev/null +++ b/data/output/0058a370ff12386a55b24ee56dd277e9c3b23a72.json @@ -0,0 +1,226 @@ +{ + "file_name": "0058a370ff12386a55b24ee56dd277e9c3b23a72.txt", + "total_words": 407, + "top_n_words": [ + { + "word": "the", + "count": 24 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "i", + "count": 8 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "with", + "count": 7 + }, + { + "word": "is", + "count": 6 + }, + { + "word": "it", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "But sometimes it hurts people.", + "length": 30 + }, + { + "text": "\"Of course it's a newspaper trying to say something.", + "length": 52 + }, + { + "text": "\"Of course I have a friend in Paris, I have people who I work with.", + "length": 67 + }, + { + "text": "\" \"Everybody has his point of view -- everybody has something to say about that,\" Toure added.", + "length": 94 + }, + { + "text": "\"I feel very sorry for the families -- they lost their friend, father, or their husband, you know.", + "length": 98 + }, + { + "text": "\"As a Muslim I always believe in the way people can say what they want to say,\" he told CNN's Amanda Davies.", + "length": 108 + }, + { + "text": "While Toure -- a Muslim himself -- advocates freedom of speech, he feels news outlets also have responsibilities.", + "length": 113 + }, + { + "text": "\"When you hear something like that it's a bit disappointing,\" he said, referring to the attack on the Charlie Hebdo office.", + "length": 123 + }, + { + "text": "You get a bit confused, a bit afraid, because, as a Muslim, I have a friend and they are Muslim as well and I'm afraid of what is going to happen.", + "length": 146 + }, + { + "text": "\" Depicting Mohammed is offensive to many Muslims and the magazine's past cartoons of the prophet apparently motivated the attackers in last week's slaughter.", + "length": 158 + }, + { + "text": "\" The new cover was met with mixed emotions -- with some calling it a bold example of free speech and others criticizing it as needlessly offensive to Muslims.", + "length": 159 + }, + { + "text": "The City midfielder says he was disappointed to hear of the attack on Charlie Hebdo -- and he now fears for the safety of his Muslim friends in the French capital.", + "length": 163 + }, + { + "text": "(CNN)Manchester City and Ivory Coast football star Yaya Toure has called on the media to be more respectful in its attitude towards religion after the Charlie Hebdo attack.", + "length": 172 + }, + { + "text": "Currently on international duty with Ivory Coast ahead of the 2015 Africa Cup of Nations which begins this week, Toure spent a season playing in the French Ligue 1 with Monaco in 2006-07.", + "length": 187 + }, + { + "text": "\"But for me, the most important thing is that we know something that sometimes the newspaper is doing a lot -- and they're trying to do too much, and sometimes they do it not with respect.", + "length": 188 + }, + { + "text": "Twelve people were killed by brothers Said and Cherif Kouachi at the French satirical magazine's offices last week, with the gunmen reportedly yelling, \"We have avenged the Prophet\" while carrying out the attack.", + "length": 212 + }, + { + "text": "\" The latest Charlie Hebdo issue has also been highly controversial, largely because on its cover is an illustration of a tearful Prophet Mohammed, holding up an \"I am Charlie\" sign accompanied by the words \"All is forgiven.", + "length": 224 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6987272500991821 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:17.596015368Z", + "first_section_created": "2025-12-23T09:35:17.596294979Z", + "last_section_published": "2025-12-23T09:35:17.596464586Z", + "all_results_received": "2025-12-23T09:35:17.659900151Z", + "output_generated": "2025-12-23T09:35:17.660043657Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:17.596294979Z", + "publish_time": "2025-12-23T09:35:17.596464586Z", + "first_worker_start": "2025-12-23T09:35:17.597076911Z", + "last_worker_end": "2025-12-23T09:35:17.656998Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:17.597032309Z", + "start_time": "2025-12-23T09:35:17.597091212Z", + "end_time": "2025-12-23T09:35:17.597127013Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:17.597291Z", + "start_time": "2025-12-23T09:35:17.597418Z", + "end_time": "2025-12-23T09:35:17.656998Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:17.597036609Z", + "start_time": "2025-12-23T09:35:17.597104912Z", + "end_time": "2025-12-23T09:35:17.597159014Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:17.597018109Z", + "start_time": "2025-12-23T09:35:17.597076911Z", + "end_time": "2025-12-23T09:35:17.597093812Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2306, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0059bb8b2718a83dfffa071bfbc6b49c2bc0ed13.json b/data/output/0059bb8b2718a83dfffa071bfbc6b49c2bc0ed13.json new file mode 100644 index 0000000..b289f8b --- /dev/null +++ b/data/output/0059bb8b2718a83dfffa071bfbc6b49c2bc0ed13.json @@ -0,0 +1,608 @@ +{ + "file_name": "0059bb8b2718a83dfffa071bfbc6b49c2bc0ed13.txt", + "total_words": 1457, + "top_n_words": [ + { + "word": "the", + "count": 60 + }, + { + "word": "to", + "count": 53 + }, + { + "word": "and", + "count": 40 + }, + { + "word": "i", + "count": 40 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "it", + "count": 33 + }, + { + "word": "we", + "count": 26 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "s", + "count": 23 + }, + { + "word": "in", + "count": 21 + } + ], + "sorted_sentences": [ + { + "text": "tietheknot.", + "length": 11 + }, + { + "text": "\" And we both do.", + "length": 17 + }, + { + "text": "Is that the case?", + "length": 17 + }, + { + "text": "\" Mikita said yes.", + "length": 18 + }, + { + "text": "\"Do you want kids?", + "length": 18 + }, + { + "text": "Julie (Bowen) had twins.", + "length": 24 + }, + { + "text": "CNN: When you go to www.", + "length": 24 + }, + { + "text": "So I just say \"boyfriend.", + "length": 25 + }, + { + "text": "Ferguson: I hate \"lover\"!", + "length": 25 + }, + { + "text": "seems to go beyond the set.", + "length": 27 + }, + { + "text": "Ferguson: About once a day!", + "length": 27 + }, + { + "text": "Do you turn into Papa Bear?", + "length": 27 + }, + { + "text": "Sofia (Vergara) got engaged.", + "length": 28 + }, + { + "text": "It's also very tough for me.", + "length": 28 + }, + { + "text": "Ty (Burell) has had two kids.", + "length": 29 + }, + { + "text": "Ferguson: I mean it really is.", + "length": 30 + }, + { + "text": "It's incredibly hard to swallow.", + "length": 32 + }, + { + "text": "Ferguson: We're in a great place.", + "length": 33 + }, + { + "text": "We're watching our families grow.", + "length": 33 + }, + { + "text": "We're in the process of planning it.", + "length": 36 + }, + { + "text": "So we came up with the bow-tie line.", + "length": 36 + }, + { + "text": "I'm also really excited to be married.", + "length": 38 + }, + { + "text": "CNN: It definitely gets your attention.", + "length": 39 + }, + { + "text": "It's one of those early date questions.", + "length": 39 + }, + { + "text": "We just want to add to who has the right.", + "length": 41 + }, + { + "text": "I don't want to jump into kids right away.", + "length": 42 + }, + { + "text": "It doesn't sound English or American at all.", + "length": 44 + }, + { + "text": "Ferguson: It's something we've talked about.", + "length": 44 + }, + { + "text": "I don't want to have a three-year engagement.", + "length": 45 + }, + { + "text": "CNN: Have you decided about starting a family?", + "length": 46 + }, + { + "text": "Ferguson: I have such mixed feelings about it.", + "length": 46 + }, + { + "text": "But, yeah, we both really want kids very much.", + "length": 46 + }, + { + "text": "I've gotten engaged since meeting these people.", + "length": 47 + }, + { + "text": "I grew up Catholic but don't practice any longer.", + "length": 49 + }, + { + "text": "\" I imagine your cast mates have been supportive.", + "length": 49 + }, + { + "text": "I don't want to be a professional engaged person.", + "length": 49 + }, + { + "text": "CNN: I think people will appreciate the symbolism.", + "length": 50 + }, + { + "text": "Also, it matches your character in \"Modern Family.", + "length": 50 + }, + { + "text": "I proposed to Justin because I wanted to be married.", + "length": 52 + }, + { + "text": "Ferguson and Mitka started the Tie the Knot foundation.", + "length": 55 + }, + { + "text": "It's the same thing as women wanting the right to vote.", + "length": 55 + }, + { + "text": "I bet you and Justin get asked a lot when the big day is.", + "length": 57 + }, + { + "text": "We thought -- why don't we kill two birds with one stone?", + "length": 57 + }, + { + "text": "It was Justin's idea to incorporate it into the foundation.", + "length": 59 + }, + { + "text": "Obviously, I'm so happy these states won marriage equality.", + "length": 59 + }, + { + "text": "So we all rally around one another and protect one another.", + "length": 59 + }, + { + "text": "During a recent trip to Mexico, Ferguson \"popped the big Q.", + "length": 59 + }, + { + "text": "\" I think there's something really sweet and innocent about it.", + "length": 63 + }, + { + "text": "It's exactly what you would expect from people who have your back.", + "length": 66 + }, + { + "text": "Nobody's looking to change the meaning of what it means to be married.", + "length": 70 + }, + { + "text": "Why did you pick this accessory as the cornerstone of your foundation?", + "length": 70 + }, + { + "text": "Ferguson: Well, we're not going to be redefining the marriage ceremony.", + "length": 71 + }, + { + "text": "CNN: I can only imagine what a \"Modern Family\" gay wedding will be like.", + "length": 72 + }, + { + "text": "CNN: Are you planning something more traditional or, dare I say, modern?", + "length": 72 + }, + { + "text": "CNN: My favorite part was your struggle with labeling your relationship.", + "length": 72 + }, + { + "text": "But one stumbling block is the lack of education about marriage equality.", + "length": 73 + }, + { + "text": "The \"Modern Family\" actor recently spoke with CNN about his organization.", + "length": 73 + }, + { + "text": "Its mission is simple: sell bow ties to raise money for marriage equality.", + "length": 74 + }, + { + "text": "It could be as early as this spring, and it could be the following spring.", + "length": 74 + }, + { + "text": "I feel like there's a fear that the definition of marriage will be changed.", + "length": 75 + }, + { + "text": "It's going to be a very nontraditional, very organic, very simple ceremony.", + "length": 75 + }, + { + "text": "I love calling Justin \"the lover I've taken on,\" but he doesn't care for that.", + "length": 78 + }, + { + "text": "We're kind of on this roller coaster together, and it's a very bonding experience.", + "length": 82 + }, + { + "text": "We're not going to have a wedding party or try to find someone amazing to officiate.", + "length": 84 + }, + { + "text": "It couldn't have been an easy decision to put your private life out there like that.", + "length": 84 + }, + { + "text": "But I'm just excited to get married and sit in the marriage place for a little while.", + "length": 85 + }, + { + "text": "There's a forward movement on this issue, and for many young Americans it's a nonissue.", + "length": 87 + }, + { + "text": "I think it will be a very short, very sweet and simple and hopefully beautiful ceremony.", + "length": 88 + }, + { + "text": "I just feel like it shouldn't be up to the majority to vote on a minority's civil rights.", + "length": 89 + }, + { + "text": "This is America and (marriage equality) should be part of the \"United\" part of our country.", + "length": 91 + }, + { + "text": "But, in the context of our foundation, it felt like (it was) the right time to tell people.", + "length": 91 + }, + { + "text": "We'll start a bow-tie line and funnel the proceeds into a foundation for marriage equality.", + "length": 91 + }, + { + "text": "I've always found the term fiance in gay or straight relationships to be completely strange.", + "length": 92 + }, + { + "text": "I'm thrilled that we are slowly making progress, and we have to make progress however we can.", + "length": 93 + }, + { + "text": "org, the first thing you see is a hilarious video of you and Justin announcing your engagement.", + "length": 95 + }, + { + "text": "Then Justin came up with the idea of Tie the Knot, which just perfectly marries those two ideas.", + "length": 96 + }, + { + "text": "CNN: And whether you're voting or getting married, who doesn't like to wear a nice bow tie, right?", + "length": 98 + }, + { + "text": "We're always checking in with one another, and some walls definitely go up to protect some people.", + "length": 98 + }, + { + "text": "I went through it with Proposition 8 (in California), seeing the majority vote on the minority's rights.", + "length": 104 + }, + { + "text": "But I do look forward to the day we stop putting it in the hands of the states and make it a national thing.", + "length": 108 + }, + { + "text": "We're kind of waiting for some pieces to fall into place and to see what my work schedule is like next summer.", + "length": 110 + }, + { + "text": "Ferguson is using his for a cause that's both political and personal: the fight to legalize same-sex marriage.", + "length": 110 + }, + { + "text": "After dating for more than two years, Ferguson and his boyfriend, Justin Mikita, decided to take the next step.", + "length": 111 + }, + { + "text": "People are always trying to test us and break us and find out the darkness, but there's really nothing to tell.", + "length": 111 + }, + { + "text": "It made me very nervous; I didn't want to exploit something that was very personal and private between Justin and me.", + "length": 117 + }, + { + "text": "So I thought about what I like to wear and also what is literally the smallest piece of clothing I could possibly design.", + "length": 121 + }, + { + "text": "CNN: I bet when one of your family members finds themselves at the center of a crazy headline or serious crisis you all react.", + "length": 126 + }, + { + "text": "Ferguson: I selfishly wanted to get involved in the fashion world anyway, but in a way that didn't feel like a huge undertaking.", + "length": 128 + }, + { + "text": "Ferguson: We kind of felt like the only way to legitimize why we wanted to do this was to announce that we were actually engaged.", + "length": 129 + }, + { + "text": "The 37-year-old Montana native doesn't just play Mitch, a gay man in a loving, committed relationship on TV; in a way, he is Mitch.", + "length": 131 + }, + { + "text": "I haven't been to the Catholic Church in years so I would feel really weird to try and bring in those traditions -- it's just not us.", + "length": 133 + }, + { + "text": "CNN: What do you see as the biggest challenge between where things are now and the protection of same-sex marriage under federal law?", + "length": 133 + }, + { + "text": "(CNN) -- The phenomenal success of \"Modern Family\" has been a game changer for its entire cast, including actor Jesse Tyler Ferguson.", + "length": 133 + }, + { + "text": "They weren't going to change the meaning of going to the polls and putting the card in the ballot; they just wanted the right to vote.", + "length": 134 + }, + { + "text": "I think it sounds so pretentious and like that \"Saturday Night Live\" skit with Will Ferrell and Rachel Dratch in the hot tub eating turkey.", + "length": 139 + }, + { + "text": "The show's high ratings, three consecutive Emmy wins for outstanding comedy series and broad fan base have given all its stars a massive platform.", + "length": 146 + }, + { + "text": "Ferguson: There's obviously some sense of protection because we know being (in) the media's eye how vulnerable that can be when you're going through something.", + "length": 159 + }, + { + "text": "I don't even have any gay friends who have gotten married, but several of my straight friends had very untraditional weddings, and those have always been my favorite.", + "length": 166 + }, + { + "text": "We'll go to a birthday party or get together at someone's house and bring our husbands, wives, fiances, boyfriends and girlfriends, and it feels like a huge extended family.", + "length": 173 + }, + { + "text": "Jesse Tyler Fergusson: I feel like when you tackle any subject with comedy, humor and wits, you're going to get a lot further than if you just give the dry facts of the cause.", + "length": 175 + }, + { + "text": "CNN: A lot of boyfriends, and girlfriends in Maine, Maryland and Washington got some good news on Election Day when same-sex marriage was legalized in those states by popular vote.", + "length": 180 + }, + { + "text": "After much consideration and debate, the newly engaged couple decided to go public with their private news, not because they don't enjoy their privacy -- they certainly do -- but because in doing so they knew they could shine a light on a cause dear to their hearts.", + "length": 266 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.4628065675497055 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:18.097266232Z", + "first_section_created": "2025-12-23T09:35:18.097643947Z", + "last_section_published": "2025-12-23T09:35:18.098077065Z", + "all_results_received": "2025-12-23T09:35:18.20853363Z", + "output_generated": "2025-12-23T09:35:18.208867044Z", + "total_processing_time_ms": 111, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 110, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:18.097643947Z", + "publish_time": "2025-12-23T09:35:18.097896358Z", + "first_worker_start": "2025-12-23T09:35:18.098473681Z", + "last_worker_end": "2025-12-23T09:35:18.207673Z", + "total_journey_time_ms": 110, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:18.098484681Z", + "start_time": "2025-12-23T09:35:18.098556884Z", + "end_time": "2025-12-23T09:35:18.098647688Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:18.098721Z", + "start_time": "2025-12-23T09:35:18.09888Z", + "end_time": "2025-12-23T09:35:18.207673Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 108 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:18.098439579Z", + "start_time": "2025-12-23T09:35:18.098507282Z", + "end_time": "2025-12-23T09:35:18.098609786Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:18.098405278Z", + "start_time": "2025-12-23T09:35:18.098473681Z", + "end_time": "2025-12-23T09:35:18.098522883Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:18.09794976Z", + "publish_time": "2025-12-23T09:35:18.098077065Z", + "first_worker_start": "2025-12-23T09:35:18.098562084Z", + "last_worker_end": "2025-12-23T09:35:18.200816Z", + "total_journey_time_ms": 102, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:18.098611986Z", + "start_time": "2025-12-23T09:35:18.098660888Z", + "end_time": "2025-12-23T09:35:18.098715291Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:18.098726Z", + "start_time": "2025-12-23T09:35:18.098868Z", + "end_time": "2025-12-23T09:35:18.200816Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 101 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:18.098717491Z", + "start_time": "2025-12-23T09:35:18.098764393Z", + "end_time": "2025-12-23T09:35:18.098845596Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:18.098514983Z", + "start_time": "2025-12-23T09:35:18.098562084Z", + "end_time": "2025-12-23T09:35:18.098587685Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 209, + "min_processing_ms": 101, + "max_processing_ms": 108, + "avg_processing_ms": 104, + "median_processing_ms": 108, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3904, + "slowest_section_id": 0, + "slowest_section_time_ms": 110 + } +} diff --git a/data/output/0059cb4d42ae552f79365af88aca13c1d14280cf.json b/data/output/0059cb4d42ae552f79365af88aca13c1d14280cf.json new file mode 100644 index 0000000..42923c3 --- /dev/null +++ b/data/output/0059cb4d42ae552f79365af88aca13c1d14280cf.json @@ -0,0 +1,338 @@ +{ + "file_name": "0059cb4d42ae552f79365af88aca13c1d14280cf.txt", + "total_words": 708, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "he", + "count": 18 + }, + { + "word": "dudley", + "count": 14 + }, + { + "word": "we", + "count": 14 + }, + { + "word": "but", + "count": 13 + }, + { + "word": "dawes", + "count": 10 + }, + { + "word": "in", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Jennifer Smith .", + "length": 16 + }, + { + "text": "'He's certainly heavy.", + "length": 22 + }, + { + "text": "07:42 EST, 25 August 2013 .", + "length": 27 + }, + { + "text": "10:33 EST, 25 August 2013 .", + "length": 27 + }, + { + "text": "'Now, he comes up to my waist!", + "length": 30 + }, + { + "text": "Obviously the dad must have been a lot bigger!", + "length": 46 + }, + { + "text": "'But other than that he's a big friendly giant.", + "length": 47 + }, + { + "text": "'The only thing we need to be wary of is shoelaces.", + "length": 51 + }, + { + "text": "'Most have heard he loves bananas and bring a bunch along.", + "length": 58 + }, + { + "text": "He stood on my toe the other week and it was black and blue!", + "length": 60 + }, + { + "text": "'At first we were shocked by his growth, but we're used to it now.", + "length": 66 + }, + { + "text": "I'm not sure of his exact weight but he must be at least 20 stone.", + "length": 66 + }, + { + "text": "The pampered pig enjoys 'cuddling in and lying in front of the TV'.", + "length": 67 + }, + { + "text": "Micro pigs are fully grown at two years old and weigh just two stone.", + "length": 69 + }, + { + "text": "Dudley has made an unlikely friend in Dexter, the family's bichon frise.", + "length": 72 + }, + { + "text": "But as time went on she began to realise he wasn't a micro pig after all .", + "length": 74 + }, + { + "text": "'I've loved pigs since I was young and always said I was going to get one.", + "length": 74 + }, + { + "text": "When Mrs Dawes bought Dudley she didn't expect him to grow past two stone .", + "length": 75 + }, + { + "text": "Despite his surprising growth, Dudley is part of the family says Ms Dawes .", + "length": 75 + }, + { + "text": "Dudley is fully house trained and enjoys cuddling up to watch TV with his owner .", + "length": 81 + }, + { + "text": "He likes to chew them so you might end up with him accidentally nibbling your leg.", + "length": 82 + }, + { + "text": "'It's probably just as well because by then we were too attached to part with him.", + "length": 82 + }, + { + "text": "His favourite thing to eat are bananas, but he enjoys scoffing all types of fruit and veg.", + "length": 90 + }, + { + "text": "\"We did contact the breeder when we realised he wasn't 'micro' but we never got a response.", + "length": 91 + }, + { + "text": "'Now, he sticks to the back - but we still have people asking if they can meet our pet pig.", + "length": 91 + }, + { + "text": "'He's fully house trained, can give you a trotter when you ask and closes doors behind him.", + "length": 91 + }, + { + "text": "'People think we're crazy having a huge pig in the house, but everyone who meets Dudley is smitten.", + "length": 99 + }, + { + "text": "But three years later, pet pig Dudley weighs over 20 stone, and measures 5ft 8in from nose to curly tail.", + "length": 105 + }, + { + "text": "'We thought Dudley had reached full-size when he got level with the coffee table, but he kept getting bigger.", + "length": 109 + }, + { + "text": "Though Ms Dawes and her Dexter are used to her unusual companion, Dudley still causes quite a stir around town.", + "length": 111 + }, + { + "text": "Once she realised Dudley wasn't a micro pig after all, Ms Dawes contacted the dishonest breeder but received no response .", + "length": 122 + }, + { + "text": "Dudley as a piglet: After Ms Dawes bought the pig as a 50th birthday treat, she thought he would remain a small size forever.", + "length": 125 + }, + { + "text": "So as Dudley continued to get bigger and bigger, it didn't take long for Ms Dawes to realise she'd been duped by the breeder.", + "length": 125 + }, + { + "text": "Dexter, the household dog, has been feeling left out as Dudley attracts so much attention when stood out in the front garden .", + "length": 126 + }, + { + "text": "So when I spotted an advert for micro pigs, I thought that would be perfect, he could live with us like a dog,' said Ms Dawes.", + "length": 126 + }, + { + "text": "Having wanted a piglet her whole life, Catherine Dawes was thrilled to buy what she thought was a micro-pig for her 50th birthday.", + "length": 130 + }, + { + "text": "After paying over £500 for the tiny animal, Ms Dawes from Blantyre, Lanarkshire, never expected to see it grow bigger than a small dog.", + "length": 136 + }, + { + "text": "'When we picked him up he was about nine weeks old and a bit bigger than I was expecting, but we saw his mother and she was pretty small.", + "length": 137 + }, + { + "text": "Ms Dawes and her partner, Colin, travelled to Nottingham to pick up Dudley after a breeder had assured them he was what they were looking for.", + "length": 142 + }, + { + "text": "Dudley, who is fully house trained, stays in the house during the day and sleeps in a coal shed in the garden at night, complete with a straw bed and heat lamp.", + "length": 160 + }, + { + "text": "'We used to put him out in the front garden, but we'd have lines of school buses stopping outside and cars nearly crashing as the drivers were too busy looking at this huge pig running around,' she laughed.", + "length": 206 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.39114177227020264 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:18.599220524Z", + "first_section_created": "2025-12-23T09:35:18.599512836Z", + "last_section_published": "2025-12-23T09:35:18.599657542Z", + "all_results_received": "2025-12-23T09:35:18.673179314Z", + "output_generated": "2025-12-23T09:35:18.673375822Z", + "total_processing_time_ms": 74, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 73, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:18.599512836Z", + "publish_time": "2025-12-23T09:35:18.599657542Z", + "first_worker_start": "2025-12-23T09:35:18.600205664Z", + "last_worker_end": "2025-12-23T09:35:18.672227Z", + "total_journey_time_ms": 72, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:18.600182563Z", + "start_time": "2025-12-23T09:35:18.600264567Z", + "end_time": "2025-12-23T09:35:18.60035397Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:18.600356Z", + "start_time": "2025-12-23T09:35:18.600499Z", + "end_time": "2025-12-23T09:35:18.672227Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 71 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:18.600130461Z", + "start_time": "2025-12-23T09:35:18.600205664Z", + "end_time": "2025-12-23T09:35:18.600318069Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:18.600150962Z", + "start_time": "2025-12-23T09:35:18.600213265Z", + "end_time": "2025-12-23T09:35:18.600271967Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 71, + "min_processing_ms": 71, + "max_processing_ms": 71, + "avg_processing_ms": 71, + "median_processing_ms": 71, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3663, + "slowest_section_id": 0, + "slowest_section_time_ms": 72 + } +} diff --git a/data/output/0059d0af6867bf2af6a1427a6d304da89ad23d1b.json b/data/output/0059d0af6867bf2af6a1427a6d304da89ad23d1b.json new file mode 100644 index 0000000..4edcd7c --- /dev/null +++ b/data/output/0059d0af6867bf2af6a1427a6d304da89ad23d1b.json @@ -0,0 +1,234 @@ +{ + "file_name": "0059d0af6867bf2af6a1427a6d304da89ad23d1b.txt", + "total_words": 270, + "top_n_words": [ + { + "word": "the", + "count": 13 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "men", + "count": 8 + }, + { + "word": "at", + "count": 7 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "two", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "injured", + "count": 6 + }, + { + "word": "police", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "com.", + "length": 4 + }, + { + "text": "au 24hrs a day.", + "length": 15 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Investigations are continuing.", + "length": 30 + }, + { + "text": "20pm on Tuesday night after reports two men had been seriously injured.", + "length": 71 + }, + { + "text": "20pm on Tuesday night after reports two men had been seriously injured .", + "length": 72 + }, + { + "text": "Police are currently questioning several other men in relation to the incident .", + "length": 80 + }, + { + "text": "Investigators found the body of a third man, aged 19, who had been killed nearby.", + "length": 81 + }, + { + "text": "Police believe the incident was not random and are currently questioning several other men .", + "length": 92 + }, + { + "text": "A stabbing at a service station has left a teenager dead and another two men seriously injured.", + "length": 95 + }, + { + "text": "Police are currently questioning three other men, aged in their 20s, in relation to the incident.", + "length": 97 + }, + { + "text": "Police were called to the petrol station on Benaud Street at Macgregor in south Brisbane at about 9.", + "length": 100 + }, + { + "text": "Anyone with information should contact Crime Stoppers anonymously via 1800 333 000 or crimestoppers.", + "length": 101 + }, + { + "text": "Police were called to the petrol station on Benaud Street at Macgregor, south of Brisbane, at about 9.", + "length": 102 + }, + { + "text": "The stabbing at the petrol station in Brisbane's south has left one man dead and another two seriously injured .", + "length": 112 + }, + { + "text": "Three crime scenes were established, one at the service station and two in Benaud Street, which included a car and a house.", + "length": 123 + }, + { + "text": "The two injured men, both aged 20, were taken to the Princess Alexandria Hospital where they remain in a serious condition.", + "length": 123 + }, + { + "text": "The two injured men, both aged 20, were taken to the Princess Alexandria Hospital where they remain in a serious condition .", + "length": 124 + }, + { + "text": "'Initial information suggests that this was not a random incident and it is believed that there is no wider risk to public safety,' Queensland Police said.", + "length": 155 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7525286674499512 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:19.100391985Z", + "first_section_created": "2025-12-23T09:35:19.100626995Z", + "last_section_published": "2025-12-23T09:35:19.100787401Z", + "all_results_received": "2025-12-23T09:35:19.162075279Z", + "output_generated": "2025-12-23T09:35:19.162194484Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:19.100626995Z", + "publish_time": "2025-12-23T09:35:19.100787401Z", + "first_worker_start": "2025-12-23T09:35:19.101285021Z", + "last_worker_end": "2025-12-23T09:35:19.161201Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:19.101356124Z", + "start_time": "2025-12-23T09:35:19.101402226Z", + "end_time": "2025-12-23T09:35:19.101428527Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:19.10158Z", + "start_time": "2025-12-23T09:35:19.101714Z", + "end_time": "2025-12-23T09:35:19.161201Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:19.101268221Z", + "start_time": "2025-12-23T09:35:19.101324723Z", + "end_time": "2025-12-23T09:35:19.101368225Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:19.101227419Z", + "start_time": "2025-12-23T09:35:19.101285021Z", + "end_time": "2025-12-23T09:35:19.101300022Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1614, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0059dd4d3cb092bb3ff6b5fe03e07e05adf79a3d.json b/data/output/0059dd4d3cb092bb3ff6b5fe03e07e05adf79a3d.json new file mode 100644 index 0000000..dae3d7b --- /dev/null +++ b/data/output/0059dd4d3cb092bb3ff6b5fe03e07e05adf79a3d.json @@ -0,0 +1,500 @@ +{ + "file_name": "0059dd4d3cb092bb3ff6b5fe03e07e05adf79a3d.txt", + "total_words": 1251, + "top_n_words": [ + { + "word": "the", + "count": 74 + }, + { + "word": "a", + "count": 38 + }, + { + "word": "and", + "count": 35 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "was", + "count": 27 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "on", + "count": 18 + }, + { + "word": "said", + "count": 16 + }, + { + "word": "for", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "Nothing.", + "length": 8 + }, + { + "text": "C Patel said.", + "length": 13 + }, + { + "text": ",' Mr Patel said.", + "length": 17 + }, + { + "text": "30 am on Monday .", + "length": 17 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "So what is the law here?", + "length": 24 + }, + { + "text": "Only we require justice', J.", + "length": 28 + }, + { + "text": "The parents have been informed.", + "length": 31 + }, + { + "text": "'What I am getting in the end of that?", + "length": 38 + }, + { + "text": "'RIP to Aneri, a great friend to many.", + "length": 38 + }, + { + "text": "She will also undergo mandatory testing.", + "length": 40 + }, + { + "text": "'She was a student at a nearby school...", + "length": 40 + }, + { + "text": "30 am, police told Daily Mail Australia on Monday.", + "length": 50 + }, + { + "text": "' A white car crashed into a chemist at around 11.", + "length": 50 + }, + { + "text": "She suffered critical injuries and died at the scene.", + "length": 53 + }, + { + "text": "'She was in the same year, she was shaken,' she said.", + "length": 53 + }, + { + "text": "For me it's a zero we don't have any more life for us.", + "length": 54 + }, + { + "text": "A 16-year-old school girl was confirmed dead at the scene .", + "length": 59 + }, + { + "text": "'Unexplainable, no words are there to explain the things...", + "length": 59 + }, + { + "text": "One mother in Kogarah said her daughter was upset by the news.", + "length": 62 + }, + { + "text": "we have just spoken to her parents,' a police spokesperson said.", + "length": 64 + }, + { + "text": "You will always be ­remembered in our hearts,' they also posted.", + "length": 65 + }, + { + "text": "'The school is providing counselling support to students and staff.", + "length": 67 + }, + { + "text": "'We are looking at speed as a factor,' the police spokesperson added.", + "length": 69 + }, + { + "text": "'They have only one child, they have lost everything, lost everything.", + "length": 71 + }, + { + "text": "and owner of the vehicle was treated in hospital for shock,' police said.", + "length": 73 + }, + { + "text": "The accident happened on the same spot as another fatal accident in 2007 .", + "length": 74 + }, + { + "text": "By SARAH DEAN, FREYA NOBLE AND DANIEL PIOTROWSKI FOR DAILY MAIL AUSTRALIA .", + "length": 75 + }, + { + "text": "A patient was trapped under the vehicle after it crashed into the pharmacy .", + "length": 76 + }, + { + "text": "'We want the justice so she can rest in peace wherever she is,' Mr Patel said.", + "length": 78 + }, + { + "text": "' Mr Tindale said in the message that counselling would be available to students.", + "length": 81 + }, + { + "text": "He also expressed his outrage over his daughter's alleged killer being unlicensed.", + "length": 82 + }, + { + "text": "Ambulance and fire brigade staff worked hard to free people trapped under the car .", + "length": 83 + }, + { + "text": "Parents of children at Kogarah High School were told of tragedy on Monday afternoon.", + "length": 84 + }, + { + "text": "Now her father, Nikul Patel, has made a desperate plea for justice, Nine News report.", + "length": 85 + }, + { + "text": "'One of our Year 11 students Aneri Patel has passed away as a result of an accident ...", + "length": 87 + }, + { + "text": "Ambulance Service of NSW said they have transported three people to St George Hospital .", + "length": 88 + }, + { + "text": "The female driver was taken to St George Hospital suffering shock after the crash at 11.", + "length": 88 + }, + { + "text": "God bless her family and provide them with strength and support through this tragic time.", + "length": 89 + }, + { + "text": "A teenager has been killed after a four wheel drive crashed into the chemist in Kogarah .", + "length": 89 + }, + { + "text": "Something has to be done very quickly otherwise it can happen any moment again' - he said.", + "length": 91 + }, + { + "text": "'I need to inform you of some sad news,' Deputy Principal Ross Tindale wrote in a text message.", + "length": 95 + }, + { + "text": "'I have asked the Director Of Public Prosecution to look closely at what has happened', he said.", + "length": 96 + }, + { + "text": "A friend of the family - Bill Saini - also expressed the grief Aneri's parents were experiencing.", + "length": 97 + }, + { + "text": "Products from the chemist including boxes of babies' nappies were strewn over the floor outside .", + "length": 97 + }, + { + "text": "The unlicensed 26-year-old woman was arrested on Monday night and was being questioned by police.", + "length": 98 + }, + { + "text": "Mr Patel said he and his wife (right) came from India to provide a better life for their daughter .", + "length": 99 + }, + { + "text": "The 26-year-old unlicensed driver was arrested on Monday night and is being interviewed by police .", + "length": 99 + }, + { + "text": "The female driver of the vehicle which struck Ms Patel was unlicensed and has been charged by police.", + "length": 101 + }, + { + "text": "Ambulances and a fire engine were at the scene on the junction of Montgomery Street and Railway Parade .", + "length": 104 + }, + { + "text": "On hearing the news that the driver had walked free on bail Aneri's father couldn't contain his disbelief.", + "length": 106 + }, + { + "text": "The fashion student sustained massive head and chest injuries and died while undergoing emergency surgery.", + "length": 106 + }, + { + "text": "Earlier this week friends of the 16-year-old schoolgirl took to social media to pay tribute to the teenager.", + "length": 108 + }, + { + "text": "Classmates described the teenager as a peaceful, clever student and urged drivers to take care on the roads .", + "length": 109 + }, + { + "text": "Other friends and family of the victim took the opportunity to convey a serious message to those on the roads.", + "length": 110 + }, + { + "text": "'Please take this as a caution to drive safely as you could hold someone’s life in your hand,' one student wrote.", + "length": 115 + }, + { + "text": "Friends of the year 11 student flooded social media not only with memories of Ms Patel, but also with a plea to drivers.", + "length": 121 + }, + { + "text": "'My daughter had her licence and was walking on the footpath, and that lady she didn't have any licence and she was driving.", + "length": 124 + }, + { + "text": "He said he moved from his home country to offer his only child a better life, and now he and his wife are left with nothing.", + "length": 124 + }, + { + "text": "'The school community’s thoughts are with the student’s family and friends,' a NSW Department of Education spokesman said.", + "length": 126 + }, + { + "text": "Friends have paid tribute to Aneri Patel, 16, who was killed on Monday morning when a car smashed into a chemist in Sydney's south .", + "length": 132 + }, + { + "text": "Year 11 exams were postponed until Tuesday and Wednesday, however Thursday and Friday would be normal school days for the year group.", + "length": 133 + }, + { + "text": "The schoolgirl's grandfather said the family were devastated by the loss of Aneri and want justice for their young family member's death.", + "length": 137 + }, + { + "text": "Nikul Patel, the father of Aneri who was killed when she was hit by a car in Kogarah on Monday has broken his silence pleading for justice .", + "length": 140 + }, + { + "text": "Ms Patel, who studied at Kogarah High School, was hit while waiting for a bus and trapped under the vehicle in the chemist next to an ANZ bank.", + "length": 143 + }, + { + "text": "Aneri's grandfather (left) and friend of the family Bill Saini (right) also spoke of their grief and wishes for justice for the schoolgirl's death .", + "length": 148 + }, + { + "text": "The 26-year-old will face Kogarah Local Court on October 14 on charges including dangerous driving occasioning death and negligent driving occasioning death.", + "length": 157 + }, + { + "text": "Aneri Patel, 16, was walking along the main street in Kogarah in Sydney's south on Monday morning when she was struck by a 4WD driven by an unlicensed driver.", + "length": 158 + }, + { + "text": "One classmate simply wrote: 'Aneri, I hope you find peace where you are going,' while another remembered her as a peaceful and quiet girl,' The Daily Telegraph report.", + "length": 167 + }, + { + "text": "' Mr Patel believes the tragedy could have been avoided had the road been made safer after another woman, Emma Hansen, was killed at the same intersection seven years ago.", + "length": 172 + }, + { + "text": "The grieving father of a schoolgirl who was tragically killed when she was hit by a car has told of how he brought his family to Australia from India in order to give them a better life.", + "length": 186 + }, + { + "text": "The newspaper also reported Puipuiomaota Galuvao, the driver who allegedly killed Aneri, was only behind the wheel as her friend Heidi Watson had just had her license suspended at Kogarah Courthouse.", + "length": 199 + }, + { + "text": "Attorney General Bras Hazzard has asked for a review of the decision to grant the driver bail, and said it needs to be determined whether the decision to grant bail was 'appropriate', The Daily Telegraph report.", + "length": 213 + }, + { + "text": "Two children passengers, aged two and four, weren't injured but an 84-year-old man who was shopping in the chemist and a 51-year-old female employee were taken to St George Hospital with non-life threatening injuries.", + "length": 217 + }, + { + "text": "The accident happened in the same place that Emma Hansen, 20, was killed outside ANZ bank on the corner of Railway Parade and Montgomery Street when a learner-driver crashed into a queue of people waiting for a bus in 2007.", + "length": 223 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6997585892677307 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:19.601570646Z", + "first_section_created": "2025-12-23T09:35:19.601930361Z", + "last_section_published": "2025-12-23T09:35:19.602317376Z", + "all_results_received": "2025-12-23T09:35:19.682424015Z", + "output_generated": "2025-12-23T09:35:19.682638923Z", + "total_processing_time_ms": 81, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 80, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:19.601930361Z", + "publish_time": "2025-12-23T09:35:19.60217007Z", + "first_worker_start": "2025-12-23T09:35:19.602642789Z", + "last_worker_end": "2025-12-23T09:35:19.681676Z", + "total_journey_time_ms": 79, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:19.602771995Z", + "start_time": "2025-12-23T09:35:19.602847098Z", + "end_time": "2025-12-23T09:35:19.602952302Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:19.602868Z", + "start_time": "2025-12-23T09:35:19.603013Z", + "end_time": "2025-12-23T09:35:19.681676Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 78 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:19.602702492Z", + "start_time": "2025-12-23T09:35:19.602765694Z", + "end_time": "2025-12-23T09:35:19.602876999Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:19.602568086Z", + "start_time": "2025-12-23T09:35:19.602642789Z", + "end_time": "2025-12-23T09:35:19.602691291Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:19.602219872Z", + "publish_time": "2025-12-23T09:35:19.602317376Z", + "first_worker_start": "2025-12-23T09:35:19.602690991Z", + "last_worker_end": "2025-12-23T09:35:19.677588Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:19.602808596Z", + "start_time": "2025-12-23T09:35:19.602859698Z", + "end_time": "2025-12-23T09:35:19.6029093Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:19.603122Z", + "start_time": "2025-12-23T09:35:19.603298Z", + "end_time": "2025-12-23T09:35:19.677588Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:19.602708292Z", + "start_time": "2025-12-23T09:35:19.602765694Z", + "end_time": "2025-12-23T09:35:19.602825797Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:19.60265379Z", + "start_time": "2025-12-23T09:35:19.602690991Z", + "end_time": "2025-12-23T09:35:19.602714992Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 152, + "min_processing_ms": 74, + "max_processing_ms": 78, + "avg_processing_ms": 76, + "median_processing_ms": 78, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3538, + "slowest_section_id": 0, + "slowest_section_time_ms": 79 + } +} diff --git a/data/output/005a250b7a98f9a5b26906b62279940b050cf8c8.json b/data/output/005a250b7a98f9a5b26906b62279940b050cf8c8.json new file mode 100644 index 0000000..4167932 --- /dev/null +++ b/data/output/005a250b7a98f9a5b26906b62279940b050cf8c8.json @@ -0,0 +1,246 @@ +{ + "file_name": "005a250b7a98f9a5b26906b62279940b050cf8c8.txt", + "total_words": 485, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "liotta", + "count": 14 + }, + { + "word": "his", + "count": 13 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "as", + "count": 8 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "of", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "\"The last thing I want to do is investigate.", + "length": 44 + }, + { + "text": "\" CNN's David Daniel contributed to this story.", + "length": 47 + }, + { + "text": "See some of the highlights of Liotta's career » .", + "length": 50 + }, + { + "text": "He's also earned plaudits for his television work.", + "length": 50 + }, + { + "text": "Liotta soon proved that acting was not his only forte.", + "length": 54 + }, + { + "text": "\" With several films currently in production, Liotta shows no signs of stopping.", + "length": 80 + }, + { + "text": "\" Ray Liotta (right) co-stars with Seth Rogen in \"Observe and Report,\" which opened Friday.", + "length": 91 + }, + { + "text": "The actor got his start on daytime TV, playing the character Joey Perrini on the soap \"Another World.", + "length": 101 + }, + { + "text": "\" The role propelled Liotta to fame and earned him a Golden Globe nomination for best supporting actor.", + "length": 103 + }, + { + "text": "In 2004, Liotta starred in an episode of the hit NBC drama, \"ER,\" winning an Emmy for his guest appearance.", + "length": 107 + }, + { + "text": "The film was one of that year's surprise hits, taking in more than $150 million at the domestic box office.", + "length": 107 + }, + { + "text": "\"Edgy guys stand out in people's minds,\" Liotta said of his famous \"bad guy\" roles, according to the Dispatch.", + "length": 110 + }, + { + "text": "\" The film, which opened in theaters Friday and has earned rave reviews by critics, is not Liotta's first comedic undertaking.", + "length": 126 + }, + { + "text": "\"A flasher keeps flashing people at the mall, so they call in the 'real police,' which is me,\" Liotta told the Columbus Dispatch.", + "length": 129 + }, + { + "text": "Liotta first made his mark on the film industry by playing a psychotic ex-husband determined to win back his ex-wife in \"Something Wild.", + "length": 136 + }, + { + "text": "He formed his own production company in 2002 and made his debut as a producer on the film \"Narc,\" in which he also starred as a corrupt cop.", + "length": 140 + }, + { + "text": "The actor told the Dispatch that he hopes to try his hand at romance in the future, joking that he'd like to \"kiss the girl without having to choke her first.", + "length": 158 + }, + { + "text": "From there, Liotta starred as mobster Henry Hill in the Martin Scorsese classic \"Goodfellas\" (1990), working alongside renowned actors Robert De Niro and Joe Pesci.", + "length": 164 + }, + { + "text": "The 54-year-old actor also starred in 2007's \"Wild Hogs,\" a comedy co-starring Tim Allen and John Travolta about a group of middle-aged suburban men who decide to become bikers.", + "length": 177 + }, + { + "text": "But, to avoid being typecast as the \"bad guy\" forever, Liotta decided to break from the mold in his next role as a caring father in the heartwarming film \"Corrina, Corrina\" (1994), co-starring Whoopi Goldberg.", + "length": 209 + }, + { + "text": "(CNN) -- An ex-convict, a mobster, a serial killer -- after more than two decades in the movie business, Ray Liotta is still perhaps best known for these \"bad guy\" roles in such films as \"Something Wild\" and \"GoodFellas.", + "length": 220 + }, + { + "text": "But in his most recent film, \"Observe and Report\" -- a dark comedy co-starring Seth Rogen as a bipolar mall security guard and Anna Faris as the vapid make-up counter clerk he's in love with -- Liotta inches away from his edgy persona to play a detective investigating a flashing incident at the mall.", + "length": 301 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.507175087928772 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:20.103243727Z", + "first_section_created": "2025-12-23T09:35:20.103583241Z", + "last_section_published": "2025-12-23T09:35:20.103742947Z", + "all_results_received": "2025-12-23T09:35:20.174350402Z", + "output_generated": "2025-12-23T09:35:20.174498208Z", + "total_processing_time_ms": 71, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:20.103583241Z", + "publish_time": "2025-12-23T09:35:20.103742947Z", + "first_worker_start": "2025-12-23T09:35:20.104132063Z", + "last_worker_end": "2025-12-23T09:35:20.173446Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:20.104259968Z", + "start_time": "2025-12-23T09:35:20.104326371Z", + "end_time": "2025-12-23T09:35:20.104379773Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:20.104523Z", + "start_time": "2025-12-23T09:35:20.104669Z", + "end_time": "2025-12-23T09:35:20.173446Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:20.104199066Z", + "start_time": "2025-12-23T09:35:20.104266668Z", + "end_time": "2025-12-23T09:35:20.104323971Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:20.104075861Z", + "start_time": "2025-12-23T09:35:20.104132063Z", + "end_time": "2025-12-23T09:35:20.104162064Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2721, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/005a56d12cf2c1335c83e387ded82fc61eac780e.json b/data/output/005a56d12cf2c1335c83e387ded82fc61eac780e.json new file mode 100644 index 0000000..0e44f6c --- /dev/null +++ b/data/output/005a56d12cf2c1335c83e387ded82fc61eac780e.json @@ -0,0 +1,588 @@ +{ + "file_name": "005a56d12cf2c1335c83e387ded82fc61eac780e.txt", + "total_words": 1349, + "top_n_words": [ + { + "word": "the", + "count": 110 + }, + { + "word": "of", + "count": 48 + }, + { + "word": "to", + "count": 44 + }, + { + "word": "and", + "count": 36 + }, + { + "word": "in", + "count": 31 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "was", + "count": 20 + }, + { + "word": "for", + "count": 18 + }, + { + "word": "is", + "count": 17 + }, + { + "word": "by", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "e.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "Getting U.", + "length": 10 + }, + { + "text": "But the U.", + "length": 10 + }, + { + "text": "Then if a U.", + "length": 12 + }, + { + "text": "So George W.", + "length": 12 + }, + { + "text": "In 1945 the U.", + "length": 14 + }, + { + "text": "It is for the U.", + "length": 16 + }, + { + "text": "Security Council.", + "length": 17 + }, + { + "text": "President George W.", + "length": 19 + }, + { + "text": "I believe George W.", + "length": 19 + }, + { + "text": "weapons inspectors.", + "length": 19 + }, + { + "text": "Nothing has happened.", + "length": 21 + }, + { + "text": "Kofi Annan, then the U.", + "length": 23 + }, + { + "text": "There was no going back.", + "length": 24 + }, + { + "text": "This was not a new vision.", + "length": 26 + }, + { + "text": "I am not alone in these views.", + "length": 30 + }, + { + "text": "Charter was ratified by the U.", + "length": 30 + }, + { + "text": "It is intolerable and inexcusable.", + "length": 34 + }, + { + "text": "resolutions concerning disarmament.", + "length": 35 + }, + { + "text": "INTERACTIVE: How has the war changed you?", + "length": 41 + }, + { + "text": "FULL COVERAGE: The Iraq War, 10 years on .", + "length": 42 + }, + { + "text": "OPINION: Why Iraq War was fought for oil .", + "length": 42 + }, + { + "text": "Bush and British Prime Minister Tony Blair.", + "length": 43 + }, + { + "text": "Until this is redressed, la lotta continua!", + "length": 43 + }, + { + "text": "The UK, to their credit, signed up to the court.", + "length": 48 + }, + { + "text": "ARWA DAMON: Iraq suffocates in cloak of sorrow .", + "length": 48 + }, + { + "text": "Troops had already been committed on the ground.", + "length": 48 + }, + { + "text": "EXCLUSIVE: Hans Blix on 'terrible mistake' in Iraq .", + "length": 52 + }, + { + "text": "Article 1 makes clear that the main purpose of the U.", + "length": 53 + }, + { + "text": "getting in first) on the basis of a perceived future threat.", + "length": 60 + }, + { + "text": "But this is not what happened 10 years ago at the behest of U.", + "length": 62 + }, + { + "text": "Unfortunately only the first three have been brought into effect.", + "length": 65 + }, + { + "text": "The embodiment of the rule of law internationally has been the U.", + "length": 65 + }, + { + "text": "is a permanent Council member and can veto any potential referral.", + "length": 66 + }, + { + "text": "did not, lest its leaders end up accused of crimes before the court.", + "length": 68 + }, + { + "text": "But regime change, however desirable, is not permitted by the Charter.", + "length": 70 + }, + { + "text": "Bush and Tony Blair should be tried for war crimes as defined by international law.", + "length": 83 + }, + { + "text": "Without it our lives would be subject to a free for all in which might becomes right.", + "length": 85 + }, + { + "text": ", the UK, and the majority of the 50 states who had originally agreed to this framework.", + "length": 88 + }, + { + "text": "Whilst the act of aggression cannot be prosecuted, war crimes committed thereafter can be.", + "length": 90 + }, + { + "text": "So far two years have gone by while the government has obstructed disclosure and publication.", + "length": 93 + }, + { + "text": "perpetrator of war crimes travelled into that country's jurisdiction, they could be arrested.", + "length": 93 + }, + { + "text": "The only way around this predicament was for the Bush-Blair axis to fabricate a case of threat.", + "length": 95 + }, + { + "text": "Bush can safely plan a visit for tea with Tony Blair in London without fear of prosecution in the UK.", + "length": 101 + }, + { + "text": "Their agenda was quite different -- to remove a dictator, Saddam Hussein, whose regime was abhorrent.", + "length": 101 + }, + { + "text": "The Council made clear they continued to be in charge but had not authorised the use of force in Iraq.", + "length": 102 + }, + { + "text": "At the time it became a debate about whether Iraq satisfied these criteria by its failure to abide by U.", + "length": 104 + }, + { + "text": "Without accountability for Western states, how can we expect the rest of world to respect these principles?", + "length": 107 + }, + { + "text": "never wanted this inquiry but was forced by the power of the victims' families and public opinion to accede.", + "length": 108 + }, + { + "text": "Inspectors had found no evidence of WMD in the lead-up to the war and never did, but were ordered to go home.", + "length": 109 + }, + { + "text": "Four transgressions were agreed -- war crimes, crimes against humanity, genocide, and the crime of aggression.", + "length": 110 + }, + { + "text": "This was why Bush and Blair were not prepared to allow the weapons inspectors, who were in Iraq, any more time.", + "length": 111 + }, + { + "text": "There is a substantial consensus of international legal opinion which recognises the illegality of the invasion.", + "length": 112 + }, + { + "text": "In 1998 the International Criminal Court was established to deal with individuals who commit international crimes.", + "length": 114 + }, + { + "text": "The Charter is not gobbledygook -- it is full of common sense, and it should be obligatory reading in every school.", + "length": 115 + }, + { + "text": "We were all strong advocates of the notion that the rule of law was the bedrock of any civilised and democratic society.", + "length": 120 + }, + { + "text": "Bush and Blair realised they would never get one, and so they prepared to go it alone with a cobbled together coalition.", + "length": 120 + }, + { + "text": "Thrashed out by experts and with massive support behind it, the document was no maverick, outlandish or oddball agreement.", + "length": 122 + }, + { + "text": "to determine what collective measures should be taken -- not for individual states to take unilateral or bilateral action.", + "length": 122 + }, + { + "text": "Alternatively individual member states could incorporate these crimes of universal jurisdiction into their own domestic law.", + "length": 124 + }, + { + "text": "The principal Security Council resolution 1441, adopted in November 2002, called on Iraq to disarm its WMD and cooperate with U.", + "length": 128 + }, + { + "text": "S leaders hauled before the court is even more problematic -- the Security Council could refer Americans to the court, but the U.", + "length": 129 + }, + { + "text": "In the UK we are still waiting for the results of a public inquiry into the circumstances in which the decision to go to war was taken.", + "length": 135 + }, + { + "text": "Secretary General, told the BBC in 2004 that the Charter had been breached and that the invasion was not sanctioned by the Security Council.", + "length": 140 + }, + { + "text": "The whole episode regarding the Iraq War is a tawdry tale that has subverted the rule of law and tarnished the reputation of international law.", + "length": 143 + }, + { + "text": "This argument, which was false, became the main basis for invasion because the only other route to war had been closed off by international law.", + "length": 144 + }, + { + "text": "has the power to authorise military intervention once all other options have been exhausted and the peace and stability of a region is in jeopardy.", + "length": 147 + }, + { + "text": "No one wanted a repeat of such flagrant aggression, so the Charter was drawn up to replace gunboat diplomacy with peaceful measures overseen by the U.", + "length": 150 + }, + { + "text": "Everyone recognised there might have to be exceptions to this rule, but the Charter specifically does not authorize preemptive nor preventative action(i.", + "length": 153 + }, + { + "text": "This is not rocket science, but the simple application of restraint and respect for the rules that Britain and America agreed to when they signed the Charter.", + "length": 158 + }, + { + "text": "Charter and the Universal Declaration of Human Rights -- direct results of the devastation inflicted by the Nazi regime in Germany during the Second World War.", + "length": 159 + }, + { + "text": "If it were, the powerful nations could go round the world picking off the weak -- or more particularly the states thought to be hostile to their own ambitions.", + "length": 159 + }, + { + "text": "Ten years ago I was one of a small number of UK lawyers who opposed the invasion of Iraq on the grounds that it was illegal and unauthorised by the United Nations.", + "length": 163 + }, + { + "text": "As a result, a legal consortium of which I was a part, and other groups in Europe, petitioned the ICC for action against UK politicians over their involvement in the war.", + "length": 170 + }, + { + "text": "The use of cluster bombs and depleted uranium in Iraq by coalition forces (euphemistically called collateral damage) upon vulnerable civilians falls within this definition.", + "length": 172 + }, + { + "text": "Tony Blair insisted to the British public that he would only support a war if a second Security Council resolution authorising the action was passed, but the resolution never came.", + "length": 180 + }, + { + "text": "It is time for Bush and Blair to be thoroughly, independently and judicially investigated for the crimes I suggest have been committed and it is time for the crime of aggression to come into force.", + "length": 197 + }, + { + "text": "is to \"maintain international peace and security and to that end to take effective collective measures for the prevention and removal of threats to the peace\" and to act in accordance with justice and the principles of international law.", + "length": 237 + }, + { + "text": "This they did by the knowing manipulation of flawed intelligence about the existence of weapons of mass destruction in Iraq (which were never found), and the bogus claim that Saddam Hussein could deploy such WMD within a 45-minute window.", + "length": 238 + }, + { + "text": "So for example to launch an attack, like the invasion of Iraq, with the knowledge that its effect is likely to cause incidental death or injury to civilians or the natural environment (Article 8) will render the perpetrator liable to prosecution.", + "length": 246 + }, + { + "text": "In case some politicians found it difficult to understand all this, Article 2(4) spelled it out in unequivocal terms: \"All Members shall refrain in their international relations from the threat or use of force against the territorial integrity or political independence of any state\".", + "length": 284 + }, + { + "text": "The UK has such a provision, but when put to the test by UK citizens seeking arrest warrants in relation to the planned visits of Israeli political and military leaders -- who were potentially responsible for war crimes in Gaza -- the UK government reprehensibly placed impediments in the way of its future use.", + "length": 311 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7393640577793121 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:20.604522692Z", + "first_section_created": "2025-12-23T09:35:20.604887407Z", + "last_section_published": "2025-12-23T09:35:20.60522292Z", + "all_results_received": "2025-12-23T09:35:20.688356881Z", + "output_generated": "2025-12-23T09:35:20.688646593Z", + "total_processing_time_ms": 84, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 83, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:20.604887407Z", + "publish_time": "2025-12-23T09:35:20.605127317Z", + "first_worker_start": "2025-12-23T09:35:20.605634037Z", + "last_worker_end": "2025-12-23T09:35:20.687696Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:20.605772543Z", + "start_time": "2025-12-23T09:35:20.605835745Z", + "end_time": "2025-12-23T09:35:20.60595665Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:20.605989Z", + "start_time": "2025-12-23T09:35:20.606122Z", + "end_time": "2025-12-23T09:35:20.687696Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 81 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:20.605678339Z", + "start_time": "2025-12-23T09:35:20.605764442Z", + "end_time": "2025-12-23T09:35:20.605858346Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:20.605570234Z", + "start_time": "2025-12-23T09:35:20.605634037Z", + "end_time": "2025-12-23T09:35:20.605676039Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:20.605172418Z", + "publish_time": "2025-12-23T09:35:20.60522292Z", + "first_worker_start": "2025-12-23T09:35:20.605757542Z", + "last_worker_end": "2025-12-23T09:35:20.684861Z", + "total_journey_time_ms": 79, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:20.605777043Z", + "start_time": "2025-12-23T09:35:20.605830945Z", + "end_time": "2025-12-23T09:35:20.605899448Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:20.606337Z", + "start_time": "2025-12-23T09:35:20.606456Z", + "end_time": "2025-12-23T09:35:20.684861Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 78 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:20.605721441Z", + "start_time": "2025-12-23T09:35:20.605757542Z", + "end_time": "2025-12-23T09:35:20.605824845Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:20.605847946Z", + "start_time": "2025-12-23T09:35:20.605907648Z", + "end_time": "2025-12-23T09:35:20.60594395Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 159, + "min_processing_ms": 78, + "max_processing_ms": 81, + "avg_processing_ms": 79, + "median_processing_ms": 81, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3940, + "slowest_section_id": 0, + "slowest_section_time_ms": 82 + } +} diff --git a/data/output/005acf7ab98656d8a9df9cc305cf0de84545b35a.json b/data/output/005acf7ab98656d8a9df9cc305cf0de84545b35a.json new file mode 100644 index 0000000..aee8817 --- /dev/null +++ b/data/output/005acf7ab98656d8a9df9cc305cf0de84545b35a.json @@ -0,0 +1,270 @@ +{ + "file_name": "005acf7ab98656d8a9df9cc305cf0de84545b35a.txt", + "total_words": 420, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "on", + "count": 10 + }, + { + "word": "said", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "strike", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "were", + "count": 7 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "tube", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "(2 p.", + "length": 5 + }, + { + "text": "5 miles away.", + "length": 13 + }, + { + "text": "The strike began Tuesday at 7 p.", + "length": 32 + }, + { + "text": "Others hit the pavement and walked.", + "length": 35 + }, + { + "text": "\"The Northern line is running perfectly.", + "length": 40 + }, + { + "text": "\"It's OK -- quite refreshing,\" said a man on Regent Street.", + "length": 59 + }, + { + "text": "and resolve this issue without any more disruption to Londoners.", + "length": 64 + }, + { + "text": "Commuters queue for packed buses in London on Wednesday morning.", + "length": 64 + }, + { + "text": "\"The fact is that Tube workers have been driven into walking out today.", + "length": 71 + }, + { + "text": "\"It's been really good,\" a girl on Oxford Street told CNN about her commute.", + "length": 76 + }, + { + "text": "ET) but the first full effects were felt during Wednesday's morning commute.", + "length": 76 + }, + { + "text": "\"The RMT leadership says we were close to a deal,\" Hardy said in a statement.", + "length": 77 + }, + { + "text": "\"If that is the case, then they should call off the strike, return to talks ...", + "length": 79 + }, + { + "text": "The RMT represents about half of the 20,000 employees on the Tube, a TfL spokeswoman said.", + "length": 90 + }, + { + "text": "Other unions including Unite and TSSA represent the rest, she said, and were not on strike.", + "length": 91 + }, + { + "text": "\" TfL was running extra buses and free shuttle services across the River Thames during the strike.", + "length": 98 + }, + { + "text": "\"RMT doesn't resort to industrial action lightly,\" General Secretary Bob Crow said in a statement.", + "length": 98 + }, + { + "text": "He said he had just walked from Liverpool Street Station, a train station as well as a Tube stop that is more than 2.", + "length": 117 + }, + { + "text": "The RMT trade union called the strike after talks with management over pay, job cuts, and disciplinary issues broke down.", + "length": 121 + }, + { + "text": "\" Still, some bus services were packed with commuters who normally ride the underground trains or who failed to find a taxi.", + "length": 124 + }, + { + "text": "Electronic travel cards used for the TfL network were temporarily being allowed on all train lines in greater London, it said.", + "length": 126 + }, + { + "text": "\" Transport Commissioner Peter Hardy said the talks had been making progress on all issues and he urged the RMT to return to the table.", + "length": 135 + }, + { + "text": "\"I think we'd all like to strike for more money, but unfortunately we can't,\" said one woman at Oxford Circus, where the Tube is closed.", + "length": 136 + }, + { + "text": "The strike was set to last for 48 hours with a normal service resuming Friday morning, according to Transport for London (TfL), which runs the city's transportation network.", + "length": 173 + }, + { + "text": "While most services on the Tube were shut because of the strike, one line -- the Northern line -- was running normally and five others were running on a reduced schedule, TfL said.", + "length": 180 + }, + { + "text": "LONDON, England (CNN) -- London commuters crammed onto buses, scrambled for taxis, cycled or simply walked on Wednesday as a strike by Tube workers shut down most of the subway network.", + "length": 185 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6420465707778931 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:21.106260176Z", + "first_section_created": "2025-12-23T09:35:21.107988246Z", + "last_section_published": "2025-12-23T09:35:21.108127051Z", + "all_results_received": "2025-12-23T09:35:21.173123279Z", + "output_generated": "2025-12-23T09:35:21.173273185Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:21.107988246Z", + "publish_time": "2025-12-23T09:35:21.108127051Z", + "first_worker_start": "2025-12-23T09:35:21.108769377Z", + "last_worker_end": "2025-12-23T09:35:21.172136Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:21.108782478Z", + "start_time": "2025-12-23T09:35:21.108856081Z", + "end_time": "2025-12-23T09:35:21.108915083Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:21.108941Z", + "start_time": "2025-12-23T09:35:21.10909Z", + "end_time": "2025-12-23T09:35:21.172136Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:21.108693174Z", + "start_time": "2025-12-23T09:35:21.108769377Z", + "end_time": "2025-12-23T09:35:21.108823979Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:21.108710775Z", + "start_time": "2025-12-23T09:35:21.108801678Z", + "end_time": "2025-12-23T09:35:21.10883828Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2390, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/005b8761aacb2354318700e781da8e3a70796617.json b/data/output/005b8761aacb2354318700e781da8e3a70796617.json new file mode 100644 index 0000000..e4784b9 --- /dev/null +++ b/data/output/005b8761aacb2354318700e781da8e3a70796617.json @@ -0,0 +1,254 @@ +{ + "file_name": "005b8761aacb2354318700e781da8e3a70796617.txt", + "total_words": 411, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "sudan", + "count": 16 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "south", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "s", + "count": 7 + }, + { + "word": "on", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Those negotiations were delayed last week.", + "length": 42 + }, + { + "text": "Talks are expected to pick up again on Tuesday.", + "length": 47 + }, + { + "text": "Despite the split, al-Bashir has a stake in the talks.", + "length": 54 + }, + { + "text": "As teams from both sides are negotiating, fighting rages.", + "length": 57 + }, + { + "text": "Heed the warnings: Genocide and Rwanda's lessons for South Sudan .", + "length": 66 + }, + { + "text": "CNN's Samira Said and Nana Karikari-apau contributed to this report.", + "length": 68 + }, + { + "text": "\"Not a single day can be lost in the search for peace in South Sudan.", + "length": 69 + }, + { + "text": "Violence quickly spread with reports of mass killings emerging nationwide.", + "length": 74 + }, + { + "text": "Though South Sudan and Sudan divorced, they still have unresolved oil issues.", + "length": 77 + }, + { + "text": "Prolonged fighting has cut South Sudan's oil output, affecting both economies.", + "length": 78 + }, + { + "text": "Since then, militia members loyal to the ousted leader have battled government forces.", + "length": 86 + }, + { + "text": "South Sudan seceded from Sudan in 2011 after decades of war, making it the world's youngest nation.", + "length": 99 + }, + { + "text": "Meanwhile, talks between South Sudan's government and rebels began Monday in Addis Ababa, Ethiopia.", + "length": 99 + }, + { + "text": "Three weeks of fighting have left more than 1,000 people dead and forced 200,000 from their homes, officials say.", + "length": 113 + }, + { + "text": "The negotiations \"come not a moment too soon,\" African Union Chairwoman Nkosazana Dlamini-Zuma said in a statement.", + "length": 115 + }, + { + "text": "Al-Bashir's visit comes as rival parties in the South Sudan power struggle work to find a solution to the violence.", + "length": 115 + }, + { + "text": "They reminded themselves of the long-drawn liberation struggle that culminated in the independence of their country.", + "length": 116 + }, + { + "text": "\"The two delegations appreciated the gravity of the situation and the need and urgency of resolving the crisis in South Sudan.", + "length": 126 + }, + { + "text": "\" South Sudan erupted in violence on December 15 when rebels loyal to ousted Vice President Riek Machar tried to stage a coup.", + "length": 126 + }, + { + "text": "The two later held a joint news conference with al-Bashir stressing readiness to support South Sudan, according to the official Sudan News Agency.", + "length": 146 + }, + { + "text": "He flew into the airport in the capital of Juba before heading to the presidential palace to meet his South Sudan counterpart, President Salva Kiir.", + "length": 148 + }, + { + "text": "(CNN) -- Sudanese President Omar al-Bashir arrived in neighboring South Sudan on Monday for talks on unrest in the latter nation that has left hundreds dead.", + "length": 157 + }, + { + "text": "Stopping the fighting in South Sudan is not only a humanitarian imperative but also a strategic necessity, in order to halt the rapid descent of Africa's newest nation into collapse.", + "length": 182 + }, + { + "text": "They regretted the unfortunate situation which the current conflict has brought,\" read a statement from the Intergovernmental Authority on Development, an East African trade bloc helping to mediate between the parties.", + "length": 218 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.44775205850601196 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:21.608904496Z", + "first_section_created": "2025-12-23T09:35:21.610285352Z", + "last_section_published": "2025-12-23T09:35:21.610464459Z", + "all_results_received": "2025-12-23T09:35:21.671708535Z", + "output_generated": "2025-12-23T09:35:21.67184604Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:21.610285352Z", + "publish_time": "2025-12-23T09:35:21.610464459Z", + "first_worker_start": "2025-12-23T09:35:21.61099198Z", + "last_worker_end": "2025-12-23T09:35:21.670867Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:21.611014281Z", + "start_time": "2025-12-23T09:35:21.611080084Z", + "end_time": "2025-12-23T09:35:21.611133086Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:21.611242Z", + "start_time": "2025-12-23T09:35:21.611389Z", + "end_time": "2025-12-23T09:35:21.670867Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:21.610942278Z", + "start_time": "2025-12-23T09:35:21.61099198Z", + "end_time": "2025-12-23T09:35:21.611052883Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:21.610950579Z", + "start_time": "2025-12-23T09:35:21.611014481Z", + "end_time": "2025-12-23T09:35:21.611036582Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2500, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/005bbd11c80c5be553f0a1c6b1928bc9ada53dc4.json b/data/output/005bbd11c80c5be553f0a1c6b1928bc9ada53dc4.json new file mode 100644 index 0000000..17dffe5 --- /dev/null +++ b/data/output/005bbd11c80c5be553f0a1c6b1928bc9ada53dc4.json @@ -0,0 +1,434 @@ +{ + "file_name": "005bbd11c80c5be553f0a1c6b1928bc9ada53dc4.txt", + "total_words": 573, + "top_n_words": [ + { + "word": "to", + "count": 21 + }, + { + "word": "the", + "count": 18 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "mr", + "count": 14 + }, + { + "word": "blair", + "count": 12 + }, + { + "word": "mugabe", + "count": 12 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "we", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "he", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "A .", + "length": 3 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "But .", + "length": 5 + }, + { + "text": "Lord .", + "length": 6 + }, + { + "text": "‘We .", + "length": 7 + }, + { + "text": "’ He .", + "length": 8 + }, + { + "text": "‘Why .", + "length": 8 + }, + { + "text": "’ The .", + "length": 9 + }, + { + "text": "collapse.", + "length": 9 + }, + { + "text": "However .", + "length": 9 + }, + { + "text": "Zimbabwe?", + "length": 9 + }, + { + "text": "‘There .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "’ Violent .", + "length": 13 + }, + { + "text": "Tamara Cohen .", + "length": 14 + }, + { + "text": "He added: ‘A .", + "length": 16 + }, + { + "text": "While Mr Blair .", + "length": 16 + }, + { + "text": "Mugabe must go”.", + "length": 18 + }, + { + "text": "It later collapsed.", + "length": 19 + }, + { + "text": "different positions.", + "length": 20 + }, + { + "text": "how to handle Mugabe.", + "length": 21 + }, + { + "text": "This was the difference.", + "length": 24 + }, + { + "text": "18:30 EST, 27 November 2013 .", + "length": 29 + }, + { + "text": "12:52 EST, 27 November 2013 .", + "length": 29 + }, + { + "text": "part in any military intervention.", + "length": 34 + }, + { + "text": "look at’ toppling Mr Mugabe by force.", + "length": 39 + }, + { + "text": "During his presidency from 1999 until .", + "length": 39 + }, + { + "text": "Pressure: Tony Blair and Thabo Mbeki in 2006.", + "length": 45 + }, + { + "text": "plan so we can physically remove Robert Mugabe”.", + "length": 50 + }, + { + "text": "yesterday to corroborate his claim against Mr Blair.", + "length": 52 + }, + { + "text": "He added: ‘The problem was, we were speaking from .", + "length": 53 + }, + { + "text": "him and we are going to put someone else in his place?", + "length": 54 + }, + { + "text": "problems – the best way to solve them is regime change.", + "length": 57 + }, + { + "text": "and suggested he did not consider it a serious proposition.", + "length": 59 + }, + { + "text": "to the point of using military force, and we said “No”.", + "length": 59 + }, + { + "text": "So we said, “No, let Zimbabweans sit down, let them talk”.", + "length": 62 + }, + { + "text": "Mr Mugabe's 33-year rule has included a crackdown on opponents .", + "length": 64 + }, + { + "text": "Mr Mbeki has attacked Mr Blair's interventionist foreign policy .", + "length": 65 + }, + { + "text": "Britain of plotting to overthrow him and ‘recolonise’ Zimbabwe.", + "length": 67 + }, + { + "text": "retired chief of the British armed forces said he had to withstand .", + "length": 68 + }, + { + "text": "‘But we said, “Mugabe is part of the solution to this problem”.", + "length": 69 + }, + { + "text": "South African government helped to broker a power-sharing agreement .", + "length": 69 + }, + { + "text": "knew that, because we had come under the same pressure, and that we .", + "length": 69 + }, + { + "text": "were other people saying, “There are political problems, economic .", + "length": 69 + }, + { + "text": "condemned the regime and backed tougher sanctions, Mr Mugabe accused .", + "length": 70 + }, + { + "text": "claimed the idea was rejected because Britain had no right to decide .", + "length": 70 + }, + { + "text": "2008 he said South Africa and Britain were in stark disagreement over .", + "length": 71 + }, + { + "text": "land reforms in 2000 and 2001 saw some 4,000 white Zimbabweans forced .", + "length": 71 + }, + { + "text": "tougher stance against him, but he never asked anyone to plan or take .", + "length": 71 + }, + { + "text": "Mr Mbeki cited Lord Guthrie’s comments to the Al Jazeera TV channel .", + "length": 71 + }, + { + "text": "spokesman for Mr Blair said: ‘Tony Blair has long believed Zimbabwe .", + "length": 71 + }, + { + "text": "Guthrie, who was Chief of the Defence Staff during Mr Blair’s first .", + "length": 71 + }, + { + "text": "would be much better off without Robert Mugabe and always argued for a .", + "length": 72 + }, + { + "text": "term, disclosed in 2007 that ‘people were always trying to get me to .", + "length": 72 + }, + { + "text": "from their farms, while hyperinflation took the economy to the brink of .", + "length": 73 + }, + { + "text": "does it become British responsibility to decide who leads the people of .", + "length": 73 + }, + { + "text": "he did not say these requests had come from the Prime Minister himself, .", + "length": 73 + }, + { + "text": "who leads African countries, adding: ‘You are coming from London, you .", + "length": 73 + }, + { + "text": "pressure from Tony Blair, who was saying, “You must work out a military .", + "length": 75 + }, + { + "text": "don’t like Robert Mugabe for whatever reason and we are going to remove .", + "length": 75 + }, + { + "text": "need to cooperate in some scheme – it was a regime change scheme – even .", + "length": 77 + }, + { + "text": "between Zanu-PF and the opposition Movement for Democratic Change (MDC) in 2008.", + "length": 80 + }, + { + "text": "Former South African president Thabo Mbeki said Mr Blair asked for his help in 2000.", + "length": 84 + }, + { + "text": "Allegation: Mr Mbeki has claimed that 13 years ago Tony Blair asked him for help in 'invading' Zimbabwe .", + "length": 105 + }, + { + "text": "Tony Blair planned to invade Zimbabwe with South African support and oust Robert Mugabe after the country descended into chaos, it was claimed yesterday.", + "length": 153 + }, + { + "text": "Talks: Tony Blair, pictured left with former South African president Thabo Mbeki, has strongly denied plotting military action against Robert Mugabe, right.", + "length": 156 + }, + { + "text": "But Mr Mbeki favoured a negotiated settlement and, despite pressure from Britain to join military action to depose Mr Mugabe and his Zanu-PF party, he refused.", + "length": 159 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.658765971660614 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:22.110073874Z", + "first_section_created": "2025-12-23T09:35:22.112131157Z", + "last_section_published": "2025-12-23T09:35:22.112295063Z", + "all_results_received": "2025-12-23T09:35:22.178795252Z", + "output_generated": "2025-12-23T09:35:22.178941858Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:22.112131157Z", + "publish_time": "2025-12-23T09:35:22.112295063Z", + "first_worker_start": "2025-12-23T09:35:22.112788883Z", + "last_worker_end": "2025-12-23T09:35:22.17786Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:22.112766283Z", + "start_time": "2025-12-23T09:35:22.112832185Z", + "end_time": "2025-12-23T09:35:22.112923989Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:22.112986Z", + "start_time": "2025-12-23T09:35:22.113143Z", + "end_time": "2025-12-23T09:35:22.17786Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:22.112739581Z", + "start_time": "2025-12-23T09:35:22.112788883Z", + "end_time": "2025-12-23T09:35:22.112885587Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:22.112806484Z", + "start_time": "2025-12-23T09:35:22.112879487Z", + "end_time": "2025-12-23T09:35:22.112923589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3508, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/005bc076b52c401a579f1da45b0e5105dc5cc811.json b/data/output/005bc076b52c401a579f1da45b0e5105dc5cc811.json new file mode 100644 index 0000000..5296f3c --- /dev/null +++ b/data/output/005bc076b52c401a579f1da45b0e5105dc5cc811.json @@ -0,0 +1,214 @@ +{ + "file_name": "005bc076b52c401a579f1da45b0e5105dc5cc811.txt", + "total_words": 261, + "top_n_words": [ + { + "word": "the", + "count": 12 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "madrid", + "count": 6 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "real", + "count": 6 + }, + { + "word": "for", + "count": 5 + }, + { + "word": "in", + "count": 5 + }, + { + "word": "but", + "count": 4 + }, + { + "word": "and", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "Real Madrid: .", + "length": 14 + }, + { + "text": "Host commentator .", + "length": 18 + }, + { + "text": "Atletico Madrid: .", + "length": 18 + }, + { + "text": "Click here for the full match report .", + "length": 38 + }, + { + "text": "What a final, what an end to the season...", + "length": 42 + }, + { + "text": "Thanks for following this evening and cheerio.", + "length": 46 + }, + { + "text": "It's taken 12 years but 'la decima' has finally arrived for Real Madrid.", + "length": 72 + }, + { + "text": "'La Decima' is officially marked then, as to a sea of confetti, Real Madrid players lift the Champions League trophy for the tenth time.", + "length": 136 + }, + { + "text": "Europe's biggest club prize has been decided, but there is still another big tournament coming up very soon that you may have heard about.", + "length": 138 + }, + { + "text": "Diego Simeone has calmed a bit to collect his runners-up medal from a smiling Michel Platini, as the Real players and staff follow closely behind.", + "length": 146 + }, + { + "text": "Gareth Bale finally helped Real Madrid fulfil the dream of 'La Decima', securing a memorable 10th Champions League triumph with an extra-time win against neighbours Atletico Madrid.", + "length": 181 + }, + { + "text": "Born in Wales, raised in Southampton and honed at Tottenham, the 24-year-old continued to make light of his world-record fee by scoring a goal that will write him into club history.", + "length": 181 + }, + { + "text": "Bale looked set to pay for a woeful first-half miss but instead capped a remarkable first season at Real by heading home to make it 2-1 in extra time, before Carlo Ancelotti's men eased away to a 4-1 win.", + "length": 204 + }, + { + "text": "The Champions League final normally marks the end of the season, but with the World Cup fast approaching, don't forget to follow our live coverage of the finals in Brazil starting in just a couple of weeks.", + "length": 206 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.6720296740531921 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:22.613051507Z", + "first_section_created": "2025-12-23T09:35:22.613392321Z", + "last_section_published": "2025-12-23T09:35:22.61361523Z", + "all_results_received": "2025-12-23T09:35:22.675858846Z", + "output_generated": "2025-12-23T09:35:22.676028053Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:22.613392321Z", + "publish_time": "2025-12-23T09:35:22.61361523Z", + "first_worker_start": "2025-12-23T09:35:22.614183053Z", + "last_worker_end": "2025-12-23T09:35:22.674823Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:22.614135751Z", + "start_time": "2025-12-23T09:35:22.614204054Z", + "end_time": "2025-12-23T09:35:22.614240155Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:22.614405Z", + "start_time": "2025-12-23T09:35:22.614538Z", + "end_time": "2025-12-23T09:35:22.674823Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:22.614158352Z", + "start_time": "2025-12-23T09:35:22.614217655Z", + "end_time": "2025-12-23T09:35:22.614267657Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:22.614134351Z", + "start_time": "2025-12-23T09:35:22.614183053Z", + "end_time": "2025-12-23T09:35:22.614197554Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1455, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/005c25fb18e5222631a7fb1ef6e77ec2e8dd317d.json b/data/output/005c25fb18e5222631a7fb1ef6e77ec2e8dd317d.json new file mode 100644 index 0000000..f8a8bfb --- /dev/null +++ b/data/output/005c25fb18e5222631a7fb1ef6e77ec2e8dd317d.json @@ -0,0 +1,460 @@ +{ + "file_name": "005c25fb18e5222631a7fb1ef6e77ec2e8dd317d.txt", + "total_words": 1069, + "top_n_words": [ + { + "word": "the", + "count": 77 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "a", + "count": 34 + }, + { + "word": "of", + "count": 26 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "officer", + "count": 21 + }, + { + "word": "farrell", + "count": 13 + }, + { + "word": "montoya", + "count": 12 + }, + { + "word": "was", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "' The .", + "length": 7 + }, + { + "text": "children.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "by the cop.", + "length": 11 + }, + { + "text": "went public.", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "” No one should.", + "length": 18 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "22:01 EST, 6 December 2013 .", + "length": 28 + }, + { + "text": "21:58 EST, 6 December 2013 .", + "length": 28 + }, + { + "text": "He storms up to and opens the .", + "length": 31 + }, + { + "text": "The primary officer is then shown .", + "length": 35 + }, + { + "text": "At this point, the teen appears to .", + "length": 36 + }, + { + "text": "officer at this point has had enough.", + "length": 37 + }, + { + "text": "Daily Mail Reporter and Associated Press .", + "length": 42 + }, + { + "text": "His termination was effective at 5pm Friday.", + "length": 44 + }, + { + "text": "children trapped inside the van scream for help.", + "length": 48 + }, + { + "text": "I hope I can help him reach his goals,' Montoya said.", + "length": 53 + }, + { + "text": "'My dad was a minister and things were expected of you.", + "length": 55 + }, + { + "text": "'They matched him with me because he wants to be a cop.", + "length": 55 + }, + { + "text": "Both she and her son are seen being led away in handcuffs.", + "length": 58 + }, + { + "text": "Her 14-year-old son was charged with battery of an officer.", + "length": 59 + }, + { + "text": "driver’s side door while ordering the driver out of the vehicle.", + "length": 66 + }, + { + "text": "smashing the right rear passenger window with his baton as petrified .", + "length": 70 + }, + { + "text": "attack the cop as Ms Farrell is at once being pulled back into the van .", + "length": 72 + }, + { + "text": "traffic stop took place south of Taos, New Mexico, on October 28 - but .", + "length": 72 + }, + { + "text": "Cuffed: The officer moves to arrest Ms Farrell in front of her children .", + "length": 73 + }, + { + "text": "by a screaming younger girl and being pulled into the middle of the road .", + "length": 74 + }, + { + "text": "Montoya has 30 days to appeal his firing to a public safety advisory council.", + "length": 77 + }, + { + "text": "At the time, Montoya was a 'model mentor' to a 10-year-old boy in the program.", + "length": 78 + }, + { + "text": "he taught me how to be responsible,' Montoya told the paper of his late father.", + "length": 79 + }, + { + "text": "it sparked local outrage after the video from a state police dashboard camera .", + "length": 79 + }, + { + "text": "She refuses and a struggle ensues as children inside the Ford can be heard screaming.", + "length": 85 + }, + { + "text": "That's it: Having seen enough, the teen jumps out of the car to confront the officer .", + "length": 86 + }, + { + "text": "Appeal: Montoya has 30 days to appeal his firing with the public safety advisory council .", + "length": 90 + }, + { + "text": "'If he can't get something from his dad, I want to be the person he can have confidence in.", + "length": 91 + }, + { + "text": "Ms Farrell, who refers to herself as a 'model parent,' was due to appear in court in April.", + "length": 91 + }, + { + "text": "Footage shows the officer casually walk up to the vehicle and begin speaking with the driver.", + "length": 93 + }, + { + "text": "According to the article, Montoya taught his mentee how to play pool and took him to football games.", + "length": 100 + }, + { + "text": "A short chase ensues before the driver again relents and the van makes its way to the side of the road.", + "length": 103 + }, + { + "text": "No word yet on whether the charges against Ms Farrell and her son will be dropped following the officer's dismissal.", + "length": 116 + }, + { + "text": "The motorist, 39-year-old Oriana Farrell, had been stopped for speeding and fled twice after arguing with an officer.", + "length": 117 + }, + { + "text": "The teen and the officer tussle before he runs back into the van as more police arrives to assist the over-matched officer.", + "length": 123 + }, + { + "text": "A 2010 Taos News article called Montoya a 'devoted father' in an article about the local Men Engaged in Nonviolence Program.", + "length": 124 + }, + { + "text": "Sacked: Elias Montoya, the officer that shot at a minivan full of children, was fired Friday from the New Mexico State Police .", + "length": 127 + }, + { + "text": "Ms Farrell does eventually exit the vehicle and gets into a physical confrontation with the officer while he tries to arrest her.", + "length": 129 + }, + { + "text": "The driver’s son, 14, according to ABC News, gets out of the van but thinks better of the idea as the cop pulls a Taser on him.", + "length": 129 + }, + { + "text": "A frantic chase through wrong-way traffic and a small town ensues before Ms Farrell finally gives herself up in front of a hotel.", + "length": 129 + }, + { + "text": "Coming to mom's rescue: At one point in the video, Ms Farrell's 14-year-old son is seen getting out of the vehicle to defend her .", + "length": 130 + }, + { + "text": "When the officer walks away, presumably to run the license and registration and decide whether to write a ticket, the van takes off.", + "length": 132 + }, + { + "text": "Multiple arrests: The teen can be seen led away in handcuffs in the foreground as his mother lie arrested on the sidewalk behind him .", + "length": 134 + }, + { + "text": "Mother's instinct: Oriana Farrell, the woman who was driving the minivan, told the Taos News that she was just trying to protect her children .", + "length": 143 + }, + { + "text": "Video from a police cruiser's dashboard camera taken October 28 showed Montoya shooting at the minivan as a Memphis, Tennessee, woman drove away.", + "length": 145 + }, + { + "text": "Montoya's dangerous reaction to Farrell's flight is strange considering he is a father himself, and had been a mentor to young men in a local nonviolence program.", + "length": 162 + }, + { + "text": "The officer was placed on administrative leave earlier this week following an investigation into the shooting outside the northern New Mexico tourist town of Taos.", + "length": 163 + }, + { + "text": "New Mexico State Police spokesman Lt Emmanuel Gutierrez says he confirmed with State Police Chief Pete Kassetas that Officer Elias Montoya is no longer employed by the department.", + "length": 179 + }, + { + "text": "According to ABC News, she was charged with five counts of abuse of a child, aggravated fleeing an officer, resisting an officer, reckless driving and possession of drug paraphernalia.", + "length": 184 + }, + { + "text": "The Taos News' editor said the paper does not usually publish op-ed pieces pertaining to ongoing legal cases, but made an exception in this case because of the huge public interest in the case.", + "length": 193 + }, + { + "text": "The New Mexico State Police officer who fired shots at a minivan full of children during a chaotic October traffic stop has been fired as its revealed that he was a mentor in a nonviolence program.", + "length": 197 + }, + { + "text": "Last month, Ms Farrell had her own account in the Taos News, giving an account of the dramatic traffic stop in October in which she claimed that she was only trying to protect her children from Montoya.", + "length": 202 + }, + { + "text": "'A uniformed officer can shoot three bullets at my van and be considered to be “doing his job”, but my doing what I can to get my own children away from such a terrifying individual has been termed “child abuse” and “endangerment,” according to New Mexico law,' she wrote.", + "length": 284 + }, + { + "text": "Calling herself a 'peace officer,' Farrell writes: 'As a single, African-American mother of five in this country, things are tough enough I should not have to endure harassment at the hands of someone who has been hired to protect the citizens of this land over an alleged “speeding offense.", + "length": 293 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5935769975185394 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:23.114819773Z", + "first_section_created": "2025-12-23T09:35:23.115152885Z", + "last_section_published": "2025-12-23T09:35:23.115484698Z", + "all_results_received": "2025-12-23T09:35:23.220192605Z", + "output_generated": "2025-12-23T09:35:23.220431514Z", + "total_processing_time_ms": 105, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 104, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:23.115152885Z", + "publish_time": "2025-12-23T09:35:23.115357093Z", + "first_worker_start": "2025-12-23T09:35:23.115966816Z", + "last_worker_end": "2025-12-23T09:35:23.199031Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:23.115963616Z", + "start_time": "2025-12-23T09:35:23.116046419Z", + "end_time": "2025-12-23T09:35:23.116144523Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:23.116219Z", + "start_time": "2025-12-23T09:35:23.116388Z", + "end_time": "2025-12-23T09:35:23.199031Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:23.115948316Z", + "start_time": "2025-12-23T09:35:23.116020018Z", + "end_time": "2025-12-23T09:35:23.116114322Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:23.115890214Z", + "start_time": "2025-12-23T09:35:23.115966816Z", + "end_time": "2025-12-23T09:35:23.116017018Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:23.115403195Z", + "publish_time": "2025-12-23T09:35:23.115484698Z", + "first_worker_start": "2025-12-23T09:35:23.116098121Z", + "last_worker_end": "2025-12-23T09:35:23.219323Z", + "total_journey_time_ms": 103, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:23.116306129Z", + "start_time": "2025-12-23T09:35:23.116395833Z", + "end_time": "2025-12-23T09:35:23.116419234Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:23.116459Z", + "start_time": "2025-12-23T09:35:23.116627Z", + "end_time": "2025-12-23T09:35:23.219323Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 102 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:23.116080521Z", + "start_time": "2025-12-23T09:35:23.116107522Z", + "end_time": "2025-12-23T09:35:23.116135123Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:23.11605182Z", + "start_time": "2025-12-23T09:35:23.116098121Z", + "end_time": "2025-12-23T09:35:23.116109622Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 184, + "min_processing_ms": 82, + "max_processing_ms": 102, + "avg_processing_ms": 92, + "median_processing_ms": 102, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2986, + "slowest_section_id": 1, + "slowest_section_time_ms": 103 + } +} diff --git a/data/output/005d4294912831a4a6d30fbaab0a50e1d0d72072.json b/data/output/005d4294912831a4a6d30fbaab0a50e1d0d72072.json new file mode 100644 index 0000000..c53313c --- /dev/null +++ b/data/output/005d4294912831a4a6d30fbaab0a50e1d0d72072.json @@ -0,0 +1,258 @@ +{ + "file_name": "005d4294912831a4a6d30fbaab0a50e1d0d72072.txt", + "total_words": 420, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "in", + "count": 28 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "saola", + "count": 11 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "are", + "count": 7 + }, + { + "word": "s", + "count": 5 + }, + { + "word": "said", + "count": 5 + }, + { + "word": "species", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "There are no saola in captivity.", + "length": 32 + }, + { + "text": "\"Since 2011, forest guard patrols ...", + "length": 37 + }, + { + "text": "Olinguito: The newest rare mammal species .", + "length": 43 + }, + { + "text": "\" New legless lizards found in California .", + "length": 43 + }, + { + "text": "441 species discovered in Amazon since 2010 .", + "length": 45 + }, + { + "text": "In Vietnam, a saola was last seen in the wild in 1998.", + "length": 54 + }, + { + "text": "Scientists discover new species in Australian rainforest .", + "length": 58 + }, + { + "text": "'Chewbacca bat,' other bizarre species found in national park .", + "length": 63 + }, + { + "text": "A WWF survey team found a skull of the animal in a hunter's home.", + "length": 65 + }, + { + "text": "In Laos, a remote camera snapped a picture of one in the wild in 1999.", + "length": 70 + }, + { + "text": "\" \"When our team first looked at the photos we couldn't believe our eyes.", + "length": 73 + }, + { + "text": "\" That moniker comes despite the fact it has two closely spaced parallel horns.", + "length": 79 + }, + { + "text": "Because of that rarity and its elusiveness, the saola is dubbed the \"Asian unicorn.", + "length": 83 + }, + { + "text": "And in 2010, Laotian villagers captured a saola that died before word got to researchers.", + "length": 89 + }, + { + "text": "Saola are the holy grail for Southeast Asian conservationists,\" Van said in a press release.", + "length": 92 + }, + { + "text": "At most a few hundred -- and as few as a couple dozen -- of the animals are thought to exist.", + "length": 93 + }, + { + "text": "Van Ngoc Thinh, WWF-Vietnam's country director, called the picture \"a breath-taking discovery.", + "length": 94 + }, + { + "text": "Environmentalists said Wednesday the pictures show that efforts to save the saola are working.", + "length": 94 + }, + { + "text": "have removed more than 30,000 snares from this critical saola habitat and destroyed more than 600 illegal hunters' camps.", + "length": 121 + }, + { + "text": "The picture of the animal was taken in September in a reserve in the Central Annamite Mountains and announced by the WWF on Tuesday.", + "length": 132 + }, + { + "text": "The saola, which is a relative of cattle but looks like an antelope, was first discovered in 1992 in forests along the Vietnam-Laos border.", + "length": 139 + }, + { + "text": "(CNN) -- Environmentalists in Vietnam were ebullient this week after remote cameras in a forest reserve snapped pictures of a live saola, one of the rarest large mammals on Earth.", + "length": 179 + }, + { + "text": "\"Saola are caught in wire snares set by hunters to catch other animals, such as deer and civets, which are largely destined for the lucrative illegal wildlife trade,\" Van said in the WWF release.", + "length": 195 + }, + { + "text": "\"This is an historic moment in Vietnam's efforts to protect our extraordinary biodiversity,\" Dang Dinh Nguyen, deputy head of the country's Quang Nam Forest Protection Department, said in the release.", + "length": 200 + }, + { + "text": "\"These are the most important wild animal photographs taken in Asia, and perhaps the world, in at least the past decade,\" said William Robichaud, coordinator of the Saola Working Group of the International Union for Conservation of Nature's Species Survival Commission, in a World Wildlife Fund press release.", + "length": 309 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.47168222069740295 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:23.616292462Z", + "first_section_created": "2025-12-23T09:35:23.616638075Z", + "last_section_published": "2025-12-23T09:35:23.616802881Z", + "all_results_received": "2025-12-23T09:35:23.681483456Z", + "output_generated": "2025-12-23T09:35:23.681662463Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:23.616638075Z", + "publish_time": "2025-12-23T09:35:23.616802881Z", + "first_worker_start": "2025-12-23T09:35:23.617397704Z", + "last_worker_end": "2025-12-23T09:35:23.680566Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:23.617360502Z", + "start_time": "2025-12-23T09:35:23.617437305Z", + "end_time": "2025-12-23T09:35:23.617478607Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:23.617573Z", + "start_time": "2025-12-23T09:35:23.617724Z", + "end_time": "2025-12-23T09:35:23.680566Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:23.6173043Z", + "start_time": "2025-12-23T09:35:23.617420105Z", + "end_time": "2025-12-23T09:35:23.617494308Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:23.617341602Z", + "start_time": "2025-12-23T09:35:23.617397704Z", + "end_time": "2025-12-23T09:35:23.617421005Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2503, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/005d650be8cfb6db3eee51d92188ac64509eb59c.json b/data/output/005d650be8cfb6db3eee51d92188ac64509eb59c.json new file mode 100644 index 0000000..c2d5ca1 --- /dev/null +++ b/data/output/005d650be8cfb6db3eee51d92188ac64509eb59c.json @@ -0,0 +1,420 @@ +{ + "file_name": "005d650be8cfb6db3eee51d92188ac64509eb59c.txt", + "total_words": 956, + "top_n_words": [ + { + "word": "the", + "count": 54 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "gaza", + "count": 16 + }, + { + "word": "israel", + "count": 14 + }, + { + "word": "that", + "count": 14 + }, + { + "word": "as", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "Col.", + "length": 4 + }, + { + "text": "IDF spokesman Lt.", + "length": 17 + }, + { + "text": "Ashraf el-Qedra said.", + "length": 21 + }, + { + "text": "Blame game continues .", + "length": 22 + }, + { + "text": "Death toll's rise slows .", + "length": 25 + }, + { + "text": "Life in Gaza: Misery heightened by war .", + "length": 40 + }, + { + "text": "Fishing is a keystone of Gazan livelihoods.", + "length": 43 + }, + { + "text": "Then again, Israel is on the defensive as well.", + "length": 47 + }, + { + "text": "Hamas denied firing rockets into Israel on Friday.", + "length": 50 + }, + { + "text": "\" All this back-and-forth, of course, is nothing new.", + "length": 53 + }, + { + "text": "Gaza conflict: Can economic isolation ever be reversed?", + "length": 55 + }, + { + "text": "Yet none of those peacemaking attempts, so far, has stuck.", + "length": 58 + }, + { + "text": "When they want to enforce a cease-fire, they do it very well.", + "length": 61 + }, + { + "text": "\" CNN exclusive: Inside the mind of Hamas' political leader .", + "length": 61 + }, + { + "text": "And there have been some breakthroughs, including a few cease-fires.", + "length": 68 + }, + { + "text": "Nobel laureate Wiesel: Hamas must stop using children as human shields .", + "length": 72 + }, + { + "text": "Besides the violence, another thing that hasn't stopped is the blame game.", + "length": 74 + }, + { + "text": "Israel's military confirmed the strike, saying the two men were militants.", + "length": 74 + }, + { + "text": "Those not agreed upon were few and limited, the ministry said in a statement.", + "length": 77 + }, + { + "text": "The IDF responded by targeting \"some 120 terror sites and nine terror operatives.", + "length": 81 + }, + { + "text": "Israel is resisting in-depth talks as long as rockets continue to head toward its territory.", + "length": 92 + }, + { + "text": "That figure includes at least 30 launched Saturday, of which 24 hit Israel, the military said.", + "length": 94 + }, + { + "text": "Israeli officials say 64 Israeli soldiers have died, and three civilians were killed in Israel.", + "length": 95 + }, + { + "text": "Israeli forces say troops redeployed after completed their mission of destroying Hamas' tunnels.", + "length": 96 + }, + { + "text": "WAFA claimed that Israeli fighter jets struck that mosque and another, killing at least three people.", + "length": 101 + }, + { + "text": "One point of contention: who broke the most recent cease-fire hours before it was supposed to run out?", + "length": 102 + }, + { + "text": "El-Qedra added Saturday night that a 13-year-old girl died in an airstrike on her family home in Rafah.", + "length": 103 + }, + { + "text": "There have been efforts to halt the bloodshed as well as to broach some of the thorny issues related to it.", + "length": 107 + }, + { + "text": "Israeli authorities fear Hamas could import weapons by sea and maintains a ship blockade off Gaza's shores.", + "length": 107 + }, + { + "text": "CNN's Jethro Mullen, Ali Younes, Tal Heinrich, Jake Tapper and Samira Said also contributed to this report.", + "length": 107 + }, + { + "text": "IDF, meanwhile says about 900 militants have been killed, which would put the civilian death toll at around 1,000.", + "length": 114 + }, + { + "text": "That was in addition to a 10-year-old boy who died while playing with friends, the Palestinian health ministry said.", + "length": 116 + }, + { + "text": "But Israel was willing to extend fishing rights to only six miles off the coast, said Hamas spokesman Sami Abu Zuhri.", + "length": 117 + }, + { + "text": "Peter Lerner said that that number was a preliminary estimate based on field reports from troops returning from battle.", + "length": 119 + }, + { + "text": "The two groups have been at odds but also made repeated efforts at a unity government, including one earlier this year.", + "length": 119 + }, + { + "text": "Palestinians also wanted Israel to extend Gaza's fishing zone in the Mediterranean from three miles off the coast to 20.", + "length": 120 + }, + { + "text": "Still, there's too much history to show that agreement on some issues will not necessarily lead to a grander breakthrough.", + "length": 122 + }, + { + "text": "According to el-Qedra, at least 1,911 in Gaza have died since the conflict began, in addition to just under 10,000 injured.", + "length": 123 + }, + { + "text": "Israeli airstrikes killed at least five more people in Gaza on Saturday, the official Palestinian news agency WAFA reported.", + "length": 124 + }, + { + "text": "After Gazan rocket fire on Friday, Israel's Foreign Ministry said the country \"will not conduct negotiations while under fire.", + "length": 126 + }, + { + "text": "The area around central Gaza's Qassam mosque, in particular, was a frenzy of activity as medical workers sifted through rubble there.", + "length": 133 + }, + { + "text": "The Iron Dome missile defense system has intercepted many of the rockets Gazan militants have fired at populated areas of the country.", + "length": 134 + }, + { + "text": "Less than a mile away from the Qassam mosque, a strike killed two men riding on a motorbike, Palestinian Health Ministry spokesman Dr.", + "length": 134 + }, + { + "text": "The Israel Defense Forces said that, since the end of the cease-fire early Friday, about 100 rockets were fired toward Israel from Gaza.", + "length": 136 + }, + { + "text": "It's unclear how many casualties were militants: The United Nations estimates that about 70% of the dead were civilians, or about 1,340.", + "length": 136 + }, + { + "text": "CNN's John Vause reported from Gaza, Matthew Chance reported from Jerusalem; Ben Brumfield and Greg Botelho wrote and reported from Atlanta.", + "length": 140 + }, + { + "text": "Still, while there weren't scores of dead Saturday as has been true many other days over the past few weeks, the total carnage remains significant.", + "length": 147 + }, + { + "text": "\" Hamas has been in charge of the Palestinian government in Gaza for years, while the Palestinian faction Fatah runs the government in the West Bank.", + "length": 149 + }, + { + "text": "Israeli government spokesman Mark Regev said it was Hamas' fault regardless, telling CNN the group runs Gaza and \"can't outsource terrorism to the other groups.", + "length": 160 + }, + { + "text": "The death toll's climb has slowed since IDF announced overnight into Saturday an end of its ground incursion in Gaza -- even as it continued to strike from the air.", + "length": 164 + }, + { + "text": "The Palestinians have asked for Israel to lift its blockade on Gaza and to re-open the air and seaports, a Palestinian negotiation who spoke on condition of anonymity said.", + "length": 172 + }, + { + "text": "One sad irony of all this bloodshed is that -- according to the Egyptian foreign ministry, which brokered recent talks -- the parties have reached an agreement on most issues.", + "length": 175 + }, + { + "text": "Yet militants from Islamic Jihad and the Al-Nasser Salah al-Din Brigades admitted to doing just that -- blaming Israel for refusing to accept their demands during negotiations.", + "length": 176 + }, + { + "text": "Gaza City (CNN) -- With the latest failed cease-fire quickly becoming a distant memory, the two sides in the Israel-Gaza conflict traded rockets and airstrikes Saturday -- as well as blame for not stopping the bloodshed.", + "length": 220 + }, + { + "text": "The Israelis and Palestinians -- particularly Hamas, the Islamic militant group and political party that controls Gaza -- have been at it for weeks, with the former fending off persistent rocket attacks and the latter dealing with relentless Israeli strikes.", + "length": 258 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8257025182247162 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:24.117541542Z", + "first_section_created": "2025-12-23T09:35:24.117833853Z", + "last_section_published": "2025-12-23T09:35:24.118283371Z", + "all_results_received": "2025-12-23T09:35:24.197413098Z", + "output_generated": "2025-12-23T09:35:24.197616506Z", + "total_processing_time_ms": 80, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 79, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:24.117833853Z", + "publish_time": "2025-12-23T09:35:24.118129865Z", + "first_worker_start": "2025-12-23T09:35:24.118699586Z", + "last_worker_end": "2025-12-23T09:35:24.196712Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:24.118681486Z", + "start_time": "2025-12-23T09:35:24.118773189Z", + "end_time": "2025-12-23T09:35:24.118880293Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:24.118905Z", + "start_time": "2025-12-23T09:35:24.119042Z", + "end_time": "2025-12-23T09:35:24.196712Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:24.118775689Z", + "start_time": "2025-12-23T09:35:24.118845192Z", + "end_time": "2025-12-23T09:35:24.119347011Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:24.118617883Z", + "start_time": "2025-12-23T09:35:24.118699586Z", + "end_time": "2025-12-23T09:35:24.118740888Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:24.118160066Z", + "publish_time": "2025-12-23T09:35:24.118283371Z", + "first_worker_start": "2025-12-23T09:35:24.118703387Z", + "last_worker_end": "2025-12-23T09:35:24.194123Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:24.118727087Z", + "start_time": "2025-12-23T09:35:24.118770789Z", + "end_time": "2025-12-23T09:35:24.11879169Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:24.118968Z", + "start_time": "2025-12-23T09:35:24.119149Z", + "end_time": "2025-12-23T09:35:24.194123Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:24.118773689Z", + "start_time": "2025-12-23T09:35:24.118832492Z", + "end_time": "2025-12-23T09:35:24.118852092Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:24.118670885Z", + "start_time": "2025-12-23T09:35:24.118703387Z", + "end_time": "2025-12-23T09:35:24.118708287Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 151, + "min_processing_ms": 74, + "max_processing_ms": 77, + "avg_processing_ms": 75, + "median_processing_ms": 77, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2848, + "slowest_section_id": 0, + "slowest_section_time_ms": 78 + } +} diff --git a/data/output/005d8918b7ecc2f2061204457fa723fd5d721176.json b/data/output/005d8918b7ecc2f2061204457fa723fd5d721176.json new file mode 100644 index 0000000..cb869b8 --- /dev/null +++ b/data/output/005d8918b7ecc2f2061204457fa723fd5d721176.json @@ -0,0 +1,596 @@ +{ + "file_name": "005d8918b7ecc2f2061204457fa723fd5d721176.txt", + "total_words": 905, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "he", + "count": 21 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "but", + "count": 17 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "s", + "count": 17 + }, + { + "word": "his", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "5 .", + "length": 3 + }, + { + "text": "5 .", + "length": 3 + }, + { + "text": "5 .", + "length": 3 + }, + { + "text": "5 .", + "length": 3 + }, + { + "text": "5 .", + "length": 3 + }, + { + "text": "5 .", + "length": 3 + }, + { + "text": "5/110 .", + "length": 7 + }, + { + "text": "Love him.", + "length": 9 + }, + { + "text": "A maestro.", + "length": 10 + }, + { + "text": "Joe Hart - 9 .", + "length": 14 + }, + { + "text": "Edin Dzeko - 8 .", + "length": 16 + }, + { + "text": "TEAM TOTAL - 87.", + "length": 16 + }, + { + "text": "Yaya Toure - 9 .", + "length": 16 + }, + { + "text": "Gael Clichy - 7.", + "length": 16 + }, + { + "text": "David Silva - 9 .", + "length": 17 + }, + { + "text": "Samir Nasri - 8 .", + "length": 17 + }, + { + "text": "Fernandinho - 8 .", + "length": 17 + }, + { + "text": "Santi Cazorla - 8.", + "length": 18 + }, + { + "text": "Aaron Ramsey - 9 .", + "length": 18 + }, + { + "text": "Mikel Arteta - 8 .", + "length": 18 + }, + { + "text": "Kieran Gibbs - 7 .", + "length": 18 + }, + { + "text": "Danny Welbeck - 8 .", + "length": 19 + }, + { + "text": "Arsene Wenger - 8 .", + "length": 19 + }, + { + "text": "Jack Wilshere - 8 .", + "length": 19 + }, + { + "text": "Sergio Aguero - 9 .", + "length": 19 + }, + { + "text": "Calum Chambers - 7.", + "length": 19 + }, + { + "text": "Pablo Zabaleta - 9 .", + "length": 20 + }, + { + "text": "Mathieu Debuchy - 7.", + "length": 20 + }, + { + "text": "Alexis Sanchez - 9 .", + "length": 20 + }, + { + "text": "Vincent Kompany - 9 .", + "length": 21 + }, + { + "text": "TEAM TOTAL - 93/110 .", + "length": 21 + }, + { + "text": "Martin Demichelis - 7.", + "length": 22 + }, + { + "text": "Laurent Koscielny - 8 .", + "length": 23 + }, + { + "text": "Manuel Pellegrini - 8 .", + "length": 23 + }, + { + "text": "Has to be on his A game.", + "length": 24 + }, + { + "text": "Monster of a midfielder.", + "length": 24 + }, + { + "text": "Power off the bench - 8.", + "length": 24 + }, + { + "text": "Power off the bench - 8 .", + "length": 25 + }, + { + "text": "I could watch him all day.", + "length": 26 + }, + { + "text": "Wojciech Szczesny - 7/10 .", + "length": 26 + }, + { + "text": "No longer just a super sub.", + "length": 27 + }, + { + "text": "lf he's fit, he has to play.", + "length": 28 + }, + { + "text": "Plays with energy and passion.", + "length": 30 + }, + { + "text": "Underestimated but very talented.", + "length": 33 + }, + { + "text": "The antithesis of Roberto Mancini.", + "length": 34 + }, + { + "text": "Wilshere offers more going forward.", + "length": 35 + }, + { + "text": "Has technique, balance and bravery.", + "length": 35 + }, + { + "text": "With a good run, he's a goal-a-game man.", + "length": 40 + }, + { + "text": "Has balance, poise and genius in his feet.", + "length": 42 + }, + { + "text": "A run of games without injuries would help.", + "length": 43 + }, + { + "text": "Enjoys defending and plays with aggression.", + "length": 43 + }, + { + "text": "Looks like he's been at Arsenal a long time.", + "length": 44 + }, + { + "text": "One of the best in the world, barring injuries.", + "length": 47 + }, + { + "text": "Rotates with Aleksandar Kolarov so stays fresh.", + "length": 47 + }, + { + "text": "Lots of competition but such an important player.", + "length": 49 + }, + { + "text": "Lovely left back but still to prove he's top-class.", + "length": 51 + }, + { + "text": "With Eliaquim Mangala around, he has to keep it up.", + "length": 51 + }, + { + "text": "Excellent defensive options in Kolarov and Mangala.", + "length": 51 + }, + { + "text": "Not much between the two but Clichy can be vulnerable.", + "length": 54 + }, + { + "text": "If he needs motivation, he got it from Louis van Gaal.", + "length": 54 + }, + { + "text": "I don't know what his best foot is but he's a magician.", + "length": 55 + }, + { + "text": "Poor result against Stoke, so he will not want another.", + "length": 55 + }, + { + "text": "Will need some earplugs because he'll get plenty of stick.", + "length": 58 + }, + { + "text": "Sportsmail's Jamie Redknapp evaluates each team to decide.", + "length": 58 + }, + { + "text": "No excuses now — needs to get in good positions and score.", + "length": 60 + }, + { + "text": "He's got pace, is tenacious in his defending and reads it well.", + "length": 63 + }, + { + "text": "City have not lost two straight league games since October 2010.", + "length": 64 + }, + { + "text": "Mathieu Flamini did well in this game last year so he could play.", + "length": 65 + }, + { + "text": "Bit of a laughing stock when he arrived but answered his critics.", + "length": 65 + }, + { + "text": "Great credentials and a big goal against Besiktas to get started.", + "length": 65 + }, + { + "text": "Yet to hit the heights of last year but it's only a matter of time.", + "length": 67 + }, + { + "text": "Doesn't fall out with his senior players and looks to get them on side.", + "length": 71 + }, + { + "text": "Epitomises everything good about foreign players in the Premier League.", + "length": 71 + }, + { + "text": "VIDEO Scroll down to watch Sportsmail's Arsenal vs Manchester City preview .", + "length": 76 + }, + { + "text": "He drives this team forward and has become one of Europe's best midfielders.", + "length": 76 + }, + { + "text": "Improving under Manuel Pellegrini and playing with the confidence strikers need.", + "length": 80 + }, + { + "text": "A disappointing World Cup but comfortable on the ball and a good minder for Toure.", + "length": 82 + }, + { + "text": "I always get the feeling he can do something erratic but he has improved immensely.", + "length": 83 + }, + { + "text": "It's sure to be an intriguing battle at the Emirates, but who will come out on top?", + "length": 83 + }, + { + "text": "Samir Nasri will face a hostile atmosphere when he returns to his former club Arsenal .", + "length": 87 + }, + { + "text": "The pressure is off after winning the FA Cup but he still needs midfield and defensive cover.", + "length": 93 + }, + { + "text": "Technically fantastic and vital as captain, but in big games his lack of pace can show him up.", + "length": 94 + }, + { + "text": "Joe Hart (left) makes a brave punch during the 3-1 win against Manchester City at the Etihad .", + "length": 94 + }, + { + "text": "Great start to his Arsenal career and so mature for his age, but today will be his biggest test.", + "length": 96 + }, + { + "text": "Yaya Toure is the driving force in Manchester City's midfield but he has not found top form yet .", + "length": 97 + }, + { + "text": "Mathieu Debuchy has settled in quickly at Arsenal after joining from Newcastle during the summer .", + "length": 98 + }, + { + "text": "Jack Wilshere should be in confident mood after anchoring England's midfield against Switzerland .", + "length": 98 + }, + { + "text": "Overcame difficulties last season but impressive for England and now in the top five in the world.", + "length": 98 + }, + { + "text": "Can struggle with injuries but a colossus and one of the best centre backs in Premier League history.", + "length": 101 + }, + { + "text": "Laurent Koscielny is certain to start in the heart of the Gunners' back four against Manchester City .", + "length": 102 + }, + { + "text": "Captain Vincent Kompany will be aiming for a clean sheet when his side visit the Emirates on Saturday .", + "length": 103 + }, + { + "text": "Alexis Sanchez already has two Arsenal goals to his name after arriving from Barcelona for £30million .", + "length": 104 + }, + { + "text": "Danny Welbeck looks set to make his debut against Manchester City after joining Arsenal on deadline day .", + "length": 105 + }, + { + "text": "Danny Welbeck is a good signing and Wenger is now one of our greatest hopes for producing good English players.", + "length": 111 + }, + { + "text": "stretch a game or James Milner can shore it up, but with Stevan Jovetic out they are light on attacking options.", + "length": 112 + }, + { + "text": "Sergio Aguero bends the ball beyond Tim Krul to seal a 2-0 win at Newcastle in City's first game of the season .", + "length": 112 + }, + { + "text": "Per Mertesacker and Flamini provide plenty of defensive experience while Mesut Ozil, if he's fit, is capable of moments of magic if Wenger needs to get creative.", + "length": 161 + }, + { + "text": "Arsenal host Manchester City in Saturday's lunchtime kick-off, looking to avenge their humiliating 6-3 defeat by Manuel Pellegrini's side at the Etihad last season.", + "length": 164 + }, + { + "text": "The Gunners are unbeaten after a win against Crystal Palace and draws with Everton and Leicester, while the champions suffered a surprise home loss to Stoke City last time out.", + "length": 176 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4602062851190567 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:24.619057033Z", + "first_section_created": "2025-12-23T09:35:24.621048109Z", + "last_section_published": "2025-12-23T09:35:24.621483226Z", + "all_results_received": "2025-12-23T09:35:24.695370653Z", + "output_generated": "2025-12-23T09:35:24.69556636Z", + "total_processing_time_ms": 76, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 73, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:24.621048109Z", + "publish_time": "2025-12-23T09:35:24.621349421Z", + "first_worker_start": "2025-12-23T09:35:24.621830139Z", + "last_worker_end": "2025-12-23T09:35:24.694391Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:24.62184724Z", + "start_time": "2025-12-23T09:35:24.621916042Z", + "end_time": "2025-12-23T09:35:24.622009446Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:24.622135Z", + "start_time": "2025-12-23T09:35:24.62226Z", + "end_time": "2025-12-23T09:35:24.694391Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:24.621754736Z", + "start_time": "2025-12-23T09:35:24.621844139Z", + "end_time": "2025-12-23T09:35:24.621968944Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:24.621763836Z", + "start_time": "2025-12-23T09:35:24.621830139Z", + "end_time": "2025-12-23T09:35:24.621885341Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:24.621414423Z", + "publish_time": "2025-12-23T09:35:24.621483226Z", + "first_worker_start": "2025-12-23T09:35:24.621830139Z", + "last_worker_end": "2025-12-23T09:35:24.657003Z", + "total_journey_time_ms": 35, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:24.621970744Z", + "start_time": "2025-12-23T09:35:24.622014146Z", + "end_time": "2025-12-23T09:35:24.622019646Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:24.622209Z", + "start_time": "2025-12-23T09:35:24.622307Z", + "end_time": "2025-12-23T09:35:24.657003Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 34 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:24.621883241Z", + "start_time": "2025-12-23T09:35:24.621906742Z", + "end_time": "2025-12-23T09:35:24.621913442Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:24.621784737Z", + "start_time": "2025-12-23T09:35:24.621830139Z", + "end_time": "2025-12-23T09:35:24.621833039Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 106, + "min_processing_ms": 34, + "max_processing_ms": 72, + "avg_processing_ms": 53, + "median_processing_ms": 72, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2553, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/005dd61a80fd00399e30ddf99a64a50d5feda585.json b/data/output/005dd61a80fd00399e30ddf99a64a50d5feda585.json new file mode 100644 index 0000000..d20aa48 --- /dev/null +++ b/data/output/005dd61a80fd00399e30ddf99a64a50d5feda585.json @@ -0,0 +1,318 @@ +{ + "file_name": "005dd61a80fd00399e30ddf99a64a50d5feda585.txt", + "total_words": 593, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "from", + "count": 18 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "cent", + "count": 12 + }, + { + "word": "million", + "count": 12 + }, + { + "word": "per", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "london", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "3% .", + "length": 4 + }, + { + "text": "4% .", + "length": 4 + }, + { + "text": "9% .", + "length": 4 + }, + { + "text": "9% .", + "length": 4 + }, + { + "text": "3% .", + "length": 4 + }, + { + "text": "Just 3.", + "length": 7 + }, + { + "text": "Arab: 1.", + "length": 8 + }, + { + "text": "Asian: 18.", + "length": 10 + }, + { + "text": "Black: 13.", + "length": 10 + }, + { + "text": "5 per cent.", + "length": 11 + }, + { + "text": "Mixed race: 5% .", + "length": 16 + }, + { + "text": "Other white: 14.", + "length": 16 + }, + { + "text": "5million in 1971.", + "length": 17 + }, + { + "text": "White British: 44.", + "length": 18 + }, + { + "text": "2 million, up from 7.", + "length": 21 + }, + { + "text": "6 million living there today.", + "length": 29 + }, + { + "text": "According to the 2011 census .", + "length": 30 + }, + { + "text": "3million in 2001, and making up 44.", + "length": 35 + }, + { + "text": "9 per cent of the city's population.", + "length": 36 + }, + { + "text": "The largest decline was in Newham, East London, where the decrease was 37.", + "length": 74 + }, + { + "text": "According to the national census from 2011 the capital's population was 8.", + "length": 74 + }, + { + "text": "Five London boroughs saw the proportion of white Britons fall by more than a quarter.", + "length": 85 + }, + { + "text": "7million Londoners described their ethnicity as 'White British' in 2011 - down from 4.", + "length": 86 + }, + { + "text": "The number of foreign-born people living in London will outnumber native Britons by 2031, it was revealed today.", + "length": 112 + }, + { + "text": "' Research by The Times has revealed the likely change in London's population by 2031 and used previous studies.", + "length": 112 + }, + { + "text": "In 1971 this figure was at more than six million but this is likely to sink below five million in the coming decades.", + "length": 117 + }, + { + "text": "Since 1951 the number of Londoners from Africa has rocketed from 90,000 to 620,000, with people from Nigeria making up 100,000 of those.", + "length": 136 + }, + { + "text": "'It is time that the political class woke up to the implications for the future of our society before they find their credibility in shreds.", + "length": 140 + }, + { + "text": "The 2011 census also revealed other social changes in London, including that white British people became a minority in London for the first time.", + "length": 145 + }, + { + "text": "'On current trends, the UK-born will be in a minority in their own capital within 20 years, despite strong public opposition to mass immigration.", + "length": 145 + }, + { + "text": "In Barking and Dagenham, on the East London/Essex border, 80 per cent of residents were white British in 2001 but by 2011 the proportion was 49 per cent.", + "length": 153 + }, + { + "text": "In the same period those originally from Asia and the Middle East went from 180,000 to 970,000 - with people originally from India making up 155,000 of those.", + "length": 158 + }, + { + "text": "But while the city's immigrant population will continue to rise sharply in the coming decades, the number of British-born people will continue to slowly decline.", + "length": 161 + }, + { + "text": "Tipping point: New research has revealed that the immigrant population of London will reach at least five million in 16 years - and outnumber British-born residents .", + "length": 166 + }, + { + "text": "Immigration groups have said that the situation will be 'totally unacceptable' to many voters while political parties say it will but put even more pressure on public services.", + "length": 176 + }, + { + "text": "New research has found the rise of non-British born Londoners will take the city's total population to more than 10 million in 2031 and 11 million in 2041 - compared with the 8.", + "length": 177 + }, + { + "text": "Of the three million foreign-born residents 40 per cent were from Europe, 30 per cent from the Middle East and Asia, 20 per cent from Africa and 10 per cent from America or the Caribbean.", + "length": 187 + }, + { + "text": "Vice chairman of MigrationWatch UK Alp Mehmet said: 'The public understand the enormous effect that the present rate of immigration is having on many of our cities and especially on London.", + "length": 189 + }, + { + "text": "The immigrant population of the capital will reach at least five million in 16 years - having more than doubled from one million in 1971 to three million in 2011, when the last census was carried out.", + "length": 200 + }, + { + "text": "Picture of Britain: Of the three million foreign-born residents in London in 2011 40 per cent were from Europe, 30 per cent from the Middle East and Asia, 20 per cent from Africa and 10 per cent from America or the Caribbean .", + "length": 227 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5588809251785278 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:25.122253788Z", + "first_section_created": "2025-12-23T09:35:25.122622402Z", + "last_section_published": "2025-12-23T09:35:25.122773108Z", + "all_results_received": "2025-12-23T09:35:25.183163919Z", + "output_generated": "2025-12-23T09:35:25.183325625Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:25.122622402Z", + "publish_time": "2025-12-23T09:35:25.122773108Z", + "first_worker_start": "2025-12-23T09:35:25.123328429Z", + "last_worker_end": "2025-12-23T09:35:25.182196Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:25.123278627Z", + "start_time": "2025-12-23T09:35:25.123328429Z", + "end_time": "2025-12-23T09:35:25.123383831Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:25.123581Z", + "start_time": "2025-12-23T09:35:25.123731Z", + "end_time": "2025-12-23T09:35:25.182196Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:25.123297428Z", + "start_time": "2025-12-23T09:35:25.12336453Z", + "end_time": "2025-12-23T09:35:25.123439933Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:25.123279027Z", + "start_time": "2025-12-23T09:35:25.12335293Z", + "end_time": "2025-12-23T09:35:25.123388031Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3330, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/005e2197c96428e9b985f970449c55d9c1e8115d.json b/data/output/005e2197c96428e9b985f970449c55d9c1e8115d.json new file mode 100644 index 0000000..7b14bcc --- /dev/null +++ b/data/output/005e2197c96428e9b985f970449c55d9c1e8115d.json @@ -0,0 +1,218 @@ +{ + "file_name": "005e2197c96428e9b985f970449c55d9c1e8115d.txt", + "total_words": 328, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "his", + "count": 12 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "26", + "count": 6 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "of", + "count": 6 + }, + { + "word": "beijing", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "A senior officer told CCTV News that Weidong’s daily training contributed to his achievement.", + "length": 95 + }, + { + "text": "Mao Weidong set a new Guinness World Record for the longest time in an abdominal plank position .", + "length": 98 + }, + { + "text": "A Chinese policeman smashed the planking world record with a staggering four hours and 26 minutes .", + "length": 99 + }, + { + "text": "The Beijing man planned to finish when he did because the numbers match his wife’s birthday, April 26 .", + "length": 105 + }, + { + "text": "It is believed the Beijing man planned to finish when he did because the numbers match his wife’s birthday, April 26.", + "length": 119 + }, + { + "text": "Mao Weidong planked his way to the Guinness World Record for the longest time in an abdominal plank position on September 26.", + "length": 125 + }, + { + "text": "Mao Weidong planked his way to the Guinness World Record for the longest time in an abdominal plank position on September 26 .", + "length": 126 + }, + { + "text": "Weidong is seen holding his position on a podium in front of spectators, while he is dripping sweat from every part of his body.", + "length": 128 + }, + { + "text": "But a Chinese policeman, from Beijing, has recently smashed the planking world record with a staggering four hours and 26 minutes.", + "length": 131 + }, + { + "text": "The Beijing SWAT team member broke the previous mark of three hours and seven minutes, which was set by American athlete George Hood in 2013.", + "length": 141 + }, + { + "text": "The Beijing SWAT team member broke the previous mark of three hours and seven minutes, which was set by American athlete George Hood in 2013 .", + "length": 142 + }, + { + "text": "An average Joe might be able to hold the plank for up to a minute while others would be lucky of making it to four before crumpling over in defeat.", + "length": 147 + }, + { + "text": "The plank is designed to sculpt a flat stomach, strengthen the entire core, provide support for the entire body in everyday movements, reduce back pain and improve posture.", + "length": 172 + }, + { + "text": "The footage shows the 43-year-old with his elbows bent 90 degrees and the rest of his weight on his forearms on the ground, forming a straight line from his head to feet while clenching his hands.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5134047269821167 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:25.623573271Z", + "first_section_created": "2025-12-23T09:35:25.623820581Z", + "last_section_published": "2025-12-23T09:35:25.623961386Z", + "all_results_received": "2025-12-23T09:35:25.689268885Z", + "output_generated": "2025-12-23T09:35:25.68939779Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:25.623820581Z", + "publish_time": "2025-12-23T09:35:25.623961386Z", + "first_worker_start": "2025-12-23T09:35:25.624528408Z", + "last_worker_end": "2025-12-23T09:35:25.688334Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:25.624520207Z", + "start_time": "2025-12-23T09:35:25.62459891Z", + "end_time": "2025-12-23T09:35:25.624634112Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:25.62478Z", + "start_time": "2025-12-23T09:35:25.624928Z", + "end_time": "2025-12-23T09:35:25.688334Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:25.624466305Z", + "start_time": "2025-12-23T09:35:25.624528408Z", + "end_time": "2025-12-23T09:35:25.62459321Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:25.624505007Z", + "start_time": "2025-12-23T09:35:25.62459701Z", + "end_time": "2025-12-23T09:35:25.624619911Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1861, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/005e57bb13a0fde59f27fa295dc4de9b8e8aee1e.json b/data/output/005e57bb13a0fde59f27fa295dc4de9b8e8aee1e.json new file mode 100644 index 0000000..c3b50e4 --- /dev/null +++ b/data/output/005e57bb13a0fde59f27fa295dc4de9b8e8aee1e.json @@ -0,0 +1,274 @@ +{ + "file_name": "005e57bb13a0fde59f27fa295dc4de9b8e8aee1e.txt", + "total_words": 574, + "top_n_words": [ + { + "word": "a", + "count": 25 + }, + { + "word": "the", + "count": 21 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "they", + "count": 14 + }, + { + "word": "said", + "count": 12 + }, + { + "word": "people", + "count": 9 + }, + { + "word": "hungover", + "count": 8 + }, + { + "word": "more", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "uk.", + "length": 3 + }, + { + "text": "org.", + "length": 4 + }, + { + "text": "Recovering from a big night out?", + "length": 32 + }, + { + "text": "' To find out more about the campaign visit gosober.", + "length": 52 + }, + { + "text": "The average British person spend 315 days a year hungover.", + "length": 58 + }, + { + "text": "A quarter said if they weren't hungover they'd have more sex .", + "length": 62 + }, + { + "text": "The ancient book also advises eating cabbage before going out drinking.", + "length": 71 + }, + { + "text": "A hangover also prevented people from taking part in more edifying activities.", + "length": 78 + }, + { + "text": "Half of respondents to the survey complained that being hungover was a waste of time.", + "length": 85 + }, + { + "text": "Many surveyed said they had missed dates, job interviews and even funerals as a result .", + "length": 88 + }, + { + "text": "The average British person spends almost a year of their lives nursing a hangover, a charity has said.", + "length": 102 + }, + { + "text": "The study suggests that being hungover could jeopardise your chance of finding romance or getting your dream job.", + "length": 113 + }, + { + "text": "The money raised will provide vital funds to support people affected by cancer so they don't have to face it alone.", + "length": 115 + }, + { + "text": "Kashk is combination of fermented yoghurt, milk and whey, and is common in Iranian, Turkish, Balkan and Arabic food.", + "length": 116 + }, + { + "text": "The charity surveyed 2,000 British adults and found that one in 14 will have more than 3,000 hangovers in their lifetime.", + "length": 121 + }, + { + "text": "The 1,000-year-old Iraqi dish involves cooking meat with vegetables and spices, and then adding something known as 'kashk'.", + "length": 123 + }, + { + "text": "Of those who complained about a hangover being a waste of time, a quarter said if they weren't hungover they'd have more sex.", + "length": 125 + }, + { + "text": "The charity is running a 'Go Sober for October' fundraising campaign to encourage people to give up drinking and raise money.", + "length": 125 + }, + { + "text": "It calculated the figures by multiplying the average amount of time people spend hungover each month with their life expectancy.", + "length": 128 + }, + { + "text": "Forget hair of the dog, an Iraqi stew from a 1,000-year-old Middle Eastern recipe book claims to be the 'ultimate hangover cure'.", + "length": 130 + }, + { + "text": "Women's hangovers appear to last longer - with the average lasting nine hours - compared to a seven hour hangover suffered by men.", + "length": 130 + }, + { + "text": "Macmillan Cancer Support has estimated that people spend 315 days of their lives battling with headaches and nausea caused by drinking.", + "length": 135 + }, + { + "text": "A further 16 per cent said they'd learn a language, 41 per cent said they'd exercise more and 13 per cent said they'd learn an instrument.", + "length": 138 + }, + { + "text": "One in every 13 of those surveyed said they had missed a first date because they were too hungover and one in ten said they had missed a job interview.", + "length": 151 + }, + { + "text": "Shamefully, eight per cent of those surveyed admitted they had missed a wedding thanks to a hangover, and six per cent admitted they had missed a funeral.", + "length": 154 + }, + { + "text": "Over half said the most annoying aspect of being hungover was that it made them feel sick while more than a quarter said wasting time was their biggest bugbear.", + "length": 160 + }, + { + "text": "'That's why we're asking people to sign up for Macmillan's Go Sober fundraising event, abstain from drinking alcohol for the month of October and ask family and friends to sponsor them.", + "length": 185 + }, + { + "text": "'This research shows hangovers are a waste of time and are causing people to miss out on everything from romance to their dream job,' said Hannah Redmond, head of national events marketing for Macmillan Cancer Support.", + "length": 218 + }, + { + "text": "The new poll revealed a north-south divide with the frequency of hangovers, with 22 per cent of people from the north likely to have more than four hangovers each month compared to 15 per cent of people from the south.", + "length": 219 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8262021541595459 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:26.125112363Z", + "first_section_created": "2025-12-23T09:35:26.126796927Z", + "last_section_published": "2025-12-23T09:35:26.126991935Z", + "all_results_received": "2025-12-23T09:35:26.188610292Z", + "output_generated": "2025-12-23T09:35:26.188788999Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:26.126796927Z", + "publish_time": "2025-12-23T09:35:26.126991935Z", + "first_worker_start": "2025-12-23T09:35:26.127514355Z", + "last_worker_end": "2025-12-23T09:35:26.187707Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:26.127496054Z", + "start_time": "2025-12-23T09:35:26.127583257Z", + "end_time": "2025-12-23T09:35:26.12764596Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:26.127669Z", + "start_time": "2025-12-23T09:35:26.127816Z", + "end_time": "2025-12-23T09:35:26.187707Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:26.127447852Z", + "start_time": "2025-12-23T09:35:26.127520055Z", + "end_time": "2025-12-23T09:35:26.127607058Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:26.127449952Z", + "start_time": "2025-12-23T09:35:26.127514355Z", + "end_time": "2025-12-23T09:35:26.127579257Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3246, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/005e9a7fcac19db7885eead541155c3d5d9a9f88.json b/data/output/005e9a7fcac19db7885eead541155c3d5d9a9f88.json new file mode 100644 index 0000000..9b0e80b --- /dev/null +++ b/data/output/005e9a7fcac19db7885eead541155c3d5d9a9f88.json @@ -0,0 +1,310 @@ +{ + "file_name": "005e9a7fcac19db7885eead541155c3d5d9a9f88.txt", + "total_words": 619, + "top_n_words": [ + { + "word": "to", + "count": 20 + }, + { + "word": "the", + "count": 19 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "s", + "count": 12 + }, + { + "word": "she", + "count": 12 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "melissa", + "count": 11 + }, + { + "word": "be", + "count": 10 + }, + { + "word": "her", + "count": 10 + }, + { + "word": "of", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "It’s such a long time.", + "length": 24 + }, + { + "text": "I wish you and Dad had been there.", + "length": 34 + }, + { + "text": "That’s what it will be for ever more.", + "length": 39 + }, + { + "text": "We’re hoping that she’ll be out sooner.", + "length": 43 + }, + { + "text": "At least I’ve got Michaella here with me.", + "length": 43 + }, + { + "text": "When can they be transferred to a UK prison?", + "length": 44 + }, + { + "text": "We’re the drug mule’s father and mother.", + "length": 44 + }, + { + "text": "5 million of cocaine out of Peru in August.", + "length": 45 + }, + { + "text": "How long do they actually have to serve in Peru?", + "length": 48 + }, + { + "text": "She’s always going to be known as the drug mule.", + "length": 50 + }, + { + "text": "They must still not be able to sleep easy at night.", + "length": 51 + }, + { + "text": "They are shown attending a court hearing last month .", + "length": 53 + }, + { + "text": "We’ve been told different things by different people.", + "length": 55 + }, + { + "text": "She told her: ‘I really don’t think it has sunk in yet.", + "length": 59 + }, + { + "text": "‘They’re first-time offenders and they didn’t instigate it.", + "length": 65 + }, + { + "text": "‘Melissa lost two grandparents this year before all this happened.", + "length": 68 + }, + { + "text": "Melissa Reid, 20, appears in a Peruvian court in September this year.", + "length": 69 + }, + { + "text": "‘Those years she’ll miss are supposed to be the time of your life.", + "length": 70 + }, + { + "text": "It’s when you settle down and meet your partner, have fun with friends.", + "length": 73 + }, + { + "text": "'The other people involved – the bigger players – must be watching all this.", + "length": 80 + }, + { + "text": "Mr Reid, from Lenzie, near Glasgow, added: ‘This stigmatises Melissa for the rest of her life.", + "length": 96 + }, + { + "text": "She and friend Michaella McCollumn have been sentenced to six years and seven months in prison .", + "length": 96 + }, + { + "text": "Breaking down in tears, she added: ‘I’m just frightened about what’s going to happen next.", + "length": 96 + }, + { + "text": "She’s got two elderly grandparents now  – personally, I don’t think she’ll  ever see them again.", + "length": 106 + }, + { + "text": "Melissa’s father, Billy, 53, said:  ‘We think the sentence is too harsh and doesn’t sit with the crime.", + "length": 110 + }, + { + "text": "Debbie Reid, who is pictured after flying out to Peru to support her daughter claims that the sentence is too long .", + "length": 116 + }, + { + "text": "’ Melissa, 20, revealed her torment in an emotional phone call to her mother, Debbie, the morning after she was sentenced.", + "length": 124 + }, + { + "text": "Mr Reid said: ‘I thought it might bring some kind of closure but it’s just opened up more issues and brought uncertainty.", + "length": 125 + }, + { + "text": "’ Sentence: Miss McCollum, right, and Miss Reid, left, were given sentences of six years and eight months for drug trafficking.", + "length": 129 + }, + { + "text": "The Mail on Sunday understands the British women are not eligible for a sentence reduction, meaning they will serve the full term.", + "length": 130 + }, + { + "text": "‘I’m trying to think she will still be a young woman when she’s released and those things will happen for  her but it’s such a long time.", + "length": 146 + }, + { + "text": "They originally claimed to have been kidnapped  by a gang in Ibiza and forced to travel to Peru, but later accepted responsibility and pleaded guilty.", + "length": 151 + }, + { + "text": "’ Melissa and Michaella, from Dungannon in Northern Ireland, were arrested trying to board a plane bound for Spain with 11kg of cocaine in their suitcases.", + "length": 157 + }, + { + "text": "’ The family are not appealing against the decision and are clinging to hopes that Melissa may one day be transferred back to the UK to serve part of her sentence.", + "length": 165 + }, + { + "text": "Melissa and fellow mule Michaella McCollum Connolly were last week jailed for six years eight months – the minimum term – after being caught trying to smuggle £1.", + "length": 167 + }, + { + "text": "Drug mule Melissa Reid’s prison sentence for smuggling cocaine is too harsh, her parents claimed last night, as they revealed their fears that she will spend the best years of her life behind bars.", + "length": 199 + }, + { + "text": "’ Happier times: Melissa Reid (bottom left) pictured with her mother Debbie, and 18-year-old twin sisters Jennifer (top right) and Stephanie (bottom right) Mrs Reid, 53, added: ‘When you think of six years eight months, Melissa is going to be 27 before the sentence is complete.", + "length": 282 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8688820600509644 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:26.627352881Z", + "first_section_created": "2025-12-23T09:35:26.627714995Z", + "last_section_published": "2025-12-23T09:35:26.627900902Z", + "all_results_received": "2025-12-23T09:35:26.690653303Z", + "output_generated": "2025-12-23T09:35:26.690783308Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:26.627714995Z", + "publish_time": "2025-12-23T09:35:26.627900902Z", + "first_worker_start": "2025-12-23T09:35:26.62837032Z", + "last_worker_end": "2025-12-23T09:35:26.687722Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:26.62838102Z", + "start_time": "2025-12-23T09:35:26.628441323Z", + "end_time": "2025-12-23T09:35:26.628535626Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:26.628571Z", + "start_time": "2025-12-23T09:35:26.628701Z", + "end_time": "2025-12-23T09:35:26.687722Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:26.628295317Z", + "start_time": "2025-12-23T09:35:26.628382721Z", + "end_time": "2025-12-23T09:35:26.628474824Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:26.628297117Z", + "start_time": "2025-12-23T09:35:26.62837032Z", + "end_time": "2025-12-23T09:35:26.628408422Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3511, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/005ed10e2fa927c69aa9d881c51ccca9ce2c789c.json b/data/output/005ed10e2fa927c69aa9d881c51ccca9ce2c789c.json new file mode 100644 index 0000000..5c9b761 --- /dev/null +++ b/data/output/005ed10e2fa927c69aa9d881c51ccca9ce2c789c.json @@ -0,0 +1,294 @@ +{ + "file_name": "005ed10e2fa927c69aa9d881c51ccca9ce2c789c.txt", + "total_words": 432, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "hotel", + "count": 9 + }, + { + "word": "were", + "count": 9 + }, + { + "word": "al", + "count": 8 + }, + { + "word": "said", + "count": 8 + }, + { + "word": "and", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "He died in a U.", + "length": 15 + }, + { + "text": "officials told CNN.", + "length": 19 + }, + { + "text": "hospital this month.", + "length": 20 + }, + { + "text": "embassies in Africa.", + "length": 20 + }, + { + "text": "He was captured by U.", + "length": 21 + }, + { + "text": "special forces in Libya.", + "length": 24 + }, + { + "text": "Five Libyans were killed.", + "length": 25 + }, + { + "text": "Guests were evacuated to safety, however.", + "length": 41 + }, + { + "text": "The gunmen then shot their way into the hotel.", + "length": 46 + }, + { + "text": "citizen, but would provide no further information.", + "length": 50 + }, + { + "text": "Al-Naas said it appears the attackers were Libyans.", + "length": 51 + }, + { + "text": "A State Department official confirmed the death of a U.", + "length": 55 + }, + { + "text": "The FBI is expected to open an investigation into the incident, two U.", + "length": 70 + }, + { + "text": "CNN's Mohammed Tawfeeq and Stephanie Halasz contributed to this report.", + "length": 71 + }, + { + "text": "A French citizen was among those killed, according to the French Foreign Ministry.", + "length": 82 + }, + { + "text": "Al-Libi was an alleged al Qaeda operative accused of involvement in the bombing of U.", + "length": 85 + }, + { + "text": "The attack began when militants detonated a car bomb in the parking lot of the hotel.", + "length": 85 + }, + { + "text": "The five-star hotel is popular among government officials, some of whom reside there.", + "length": 85 + }, + { + "text": "Their naming convention indicates that the men were of Tunisian and Sudanese origin, respectively.", + "length": 98 + }, + { + "text": "People were warned to stay away, the witness said, adding that there had been exchanges of gunfire.", + "length": 99 + }, + { + "text": "An online group that supports ISIS said the attack was carried out in the name of Abu Anas al-Libi.", + "length": 99 + }, + { + "text": "A witness to the events told CNN that all roads leading to the Corinthia Hotel had been sealed by security forces.", + "length": 114 + }, + { + "text": "A spokesman for the Corinthia Group in Malta told CNN there had not been a hostage situation in the hotel, as some reports suggested.", + "length": 133 + }, + { + "text": "Five foreigners -- one American, one French citizen, and three people from Tajikistan -- were killed in the attack, Essam al-Naas said.", + "length": 135 + }, + { + "text": "\"We are trying to take possession of the hotel back to assess the damage,\" he said, but Libyan security forces were not yet allowing that.", + "length": 138 + }, + { + "text": "Al-Naas earlier said at least two Libyan security personnel had been killed in the attack and that three gunmen were holed up in the hotel.", + "length": 139 + }, + { + "text": "The Libyan branch of ISIS claimed responsibility for the attacks, and released photos of the two gunmen, it said had carried out the attacks as Abu Ibraheem Al-Tunsi and Abu Sulaiman Al-Sudani.", + "length": 193 + }, + { + "text": "(CNN)At least 10 people and two attackers were killed in Tuesday's attack against the luxurious Corinthia Hotel in Tripoli, Libya, a spokesman for a security division of the Ministry of Interior in Tripoli said.", + "length": 211 + }, + { + "text": "American contractor David Berry was among the people killed in a terrorist attack at the Corinthia Hotel in Tripoli, Libya, on Tuesday, according to Cliff Taylor, chief executive officer of Crucible, a security firm where Berry was working.", + "length": 240 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8138910531997681 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:27.128711666Z", + "first_section_created": "2025-12-23T09:35:27.12908978Z", + "last_section_published": "2025-12-23T09:35:27.129271187Z", + "all_results_received": "2025-12-23T09:35:27.190564533Z", + "output_generated": "2025-12-23T09:35:27.190714638Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:27.12908978Z", + "publish_time": "2025-12-23T09:35:27.129271187Z", + "first_worker_start": "2025-12-23T09:35:27.129735505Z", + "last_worker_end": "2025-12-23T09:35:27.189665Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:27.129684403Z", + "start_time": "2025-12-23T09:35:27.129749506Z", + "end_time": "2025-12-23T09:35:27.129796707Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:27.129945Z", + "start_time": "2025-12-23T09:35:27.130101Z", + "end_time": "2025-12-23T09:35:27.189665Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:27.129685903Z", + "start_time": "2025-12-23T09:35:27.129760006Z", + "end_time": "2025-12-23T09:35:27.129825208Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:27.129665402Z", + "start_time": "2025-12-23T09:35:27.129735505Z", + "end_time": "2025-12-23T09:35:27.129772306Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2502, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/005edf7d0be43c1bd9ca8f53e417606c2a059369.json b/data/output/005edf7d0be43c1bd9ca8f53e417606c2a059369.json new file mode 100644 index 0000000..badca6e --- /dev/null +++ b/data/output/005edf7d0be43c1bd9ca8f53e417606c2a059369.json @@ -0,0 +1,222 @@ +{ + "file_name": "005edf7d0be43c1bd9ca8f53e417606c2a059369.txt", + "total_words": 366, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "ming", + "count": 8 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "star", + "count": 6 + }, + { + "word": "a", + "count": 5 + }, + { + "word": "golf", + "count": 5 + }, + { + "word": "is", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Great dude and a helluva miniature golfer.", + "length": 42 + }, + { + "text": "Golf legend Greg Norman (R) takes a golf clinic with the China National Team .", + "length": 78 + }, + { + "text": "Captain America star Chris Evans is shown to the red carpet by golf buggy in Haikou .", + "length": 85 + }, + { + "text": "Screen siren Nicole Kidman is escorted to the opening ceremony while holding her own likeness .", + "length": 95 + }, + { + "text": "Stunning actress Jessica Alba was one of many celebrities turning out for the star-studded event .", + "length": 98 + }, + { + "text": "Former Holland international Clarence Seedorf was one of several sports stars at the pro-am event .", + "length": 99 + }, + { + "text": "' VIDEO Scroll down to watch The cast of Hercules got a little weird during some recent interviews .", + "length": 100 + }, + { + "text": "Actor Morgan Freeman (L) was also having size issues after playing with Yao Ming at Mission Hills .", + "length": 100 + }, + { + "text": "Olympic gold medalist in figure skating Evgeni Plushenko watches on ahead of the start of the tournament .", + "length": 106 + }, + { + "text": "Johnson tweeted a picture of his mini-me moment accompanied by the caption: '7'9 Sun Ming Ming - one of the tallest players ever.", + "length": 129 + }, + { + "text": "It's not often that 6ft 5in Dwayne Johnson, aka 'The Rock', is made to feel small - but the action movie star was cut down to size on his visit to China.", + "length": 153 + }, + { + "text": "Meanwhile, more celebrities from the worlds of sport and screen were arriving in China to take part in the Mission Hills World Celebrity pro-am golf tournament near Haikou.", + "length": 172 + }, + { + "text": "Among the sportsmen on show will be former Holland and Real Madrid midfielder Clarence Seedorf, ex-NBA basketball star Yao Ming and Olympic gold medalist figure skater Evgeni Plushenko.", + "length": 186 + }, + { + "text": "The event sees the likes of Morgan Freeman, Nicole Kidman and Jessica Alba get in the swing of things while partnering professional players including Ian Poulter, Justin Rose and Greg Norman.", + "length": 191 + }, + { + "text": "The Hollywood actor travelled east to promote his film Hercules, but found time to play a round of miniature golf with the most unlikely of opponents - 7ft 9in basketball player Sun Ming Ming.", + "length": 195 + }, + { + "text": "Movie star Dwayne Johnson is dwarfed by basketball player Sun Ming Ming (R) The Beijing Ducks star, who wears size 20 shoes, is no stranger to celebrity, having appeared in Rush Hour 3 as 'Kung Fu Giant' as well as being part of the Guiness World Record tallest basketball lineup in the world.", + "length": 296 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.3985098600387573 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:27.630073351Z", + "first_section_created": "2025-12-23T09:35:27.630428964Z", + "last_section_published": "2025-12-23T09:35:27.630601971Z", + "all_results_received": "2025-12-23T09:35:27.703585464Z", + "output_generated": "2025-12-23T09:35:27.703733369Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 72, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:27.630428964Z", + "publish_time": "2025-12-23T09:35:27.630601971Z", + "first_worker_start": "2025-12-23T09:35:27.631175693Z", + "last_worker_end": "2025-12-23T09:35:27.701381Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:27.631131491Z", + "start_time": "2025-12-23T09:35:27.631176593Z", + "end_time": "2025-12-23T09:35:27.631209694Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:27.631395Z", + "start_time": "2025-12-23T09:35:27.631535Z", + "end_time": "2025-12-23T09:35:27.701381Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:27.631127191Z", + "start_time": "2025-12-23T09:35:27.631177493Z", + "end_time": "2025-12-23T09:35:27.631226595Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:27.631128191Z", + "start_time": "2025-12-23T09:35:27.631175693Z", + "end_time": "2025-12-23T09:35:27.631199894Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 69, + "min_processing_ms": 69, + "max_processing_ms": 69, + "avg_processing_ms": 69, + "median_processing_ms": 69, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2139, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/005f499d34a2306efa16fde69bd8798c002219ce.json b/data/output/005f499d34a2306efa16fde69bd8798c002219ce.json new file mode 100644 index 0000000..8ca72bd --- /dev/null +++ b/data/output/005f499d34a2306efa16fde69bd8798c002219ce.json @@ -0,0 +1,274 @@ +{ + "file_name": "005f499d34a2306efa16fde69bd8798c002219ce.txt", + "total_words": 529, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "she", + "count": 10 + }, + { + "word": "oberst", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "was", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Lucy Crossley .", + "length": 15 + }, + { + "text": "04:20 EST, 20 February 2014 .", + "length": 29 + }, + { + "text": "06:32 EST, 20 February 2014 .", + "length": 29 + }, + { + "text": "They have since been removed .", + "length": 30 + }, + { + "text": "com's It Happened to Me series.", + "length": 31 + }, + { + "text": "These posts have since been deleted from the site.", + "length": 50 + }, + { + "text": "Sorry we are unable to accept comments for legal reasons.", + "length": 57 + }, + { + "text": "Both Mr Oberst's attorney and Ms Faircloth were approached for comment.", + "length": 71 + }, + { + "text": "According to Mr Oberst's lawsuit, Ms Faircloth posted accusations on website XOJane.", + "length": 84 + }, + { + "text": "She says she posted his name a decade later to help others and at the suggestion of her husband.", + "length": 96 + }, + { + "text": "Accusation: The comments accusing Mr Obsert were written underneath an article about domestic violence XOJane.", + "length": 110 + }, + { + "text": "He has stongly denied Ms Faircloth's allegations and is seeking monetary damages, attorney fees and other costs.", + "length": 112 + }, + { + "text": "Mr Oberst, who lives in Omaha, Nebraska, is best known for his work with Bright Eyes, including song First Day Of My Life.", + "length": 122 + }, + { + "text": "She also accused Mr Oberst of punching her in the face, according to the legal papers, which add that she was 16 at the time.", + "length": 125 + }, + { + "text": "Legal action: Bright Eyes singer Conor Oberst is suing a woman for $1million after she accused him of raping her when she was 16 .", + "length": 130 + }, + { + "text": "The three comments were written underneath an article about domestic violence for website's It Happened to Me series, and were later shared on other blogs.", + "length": 155 + }, + { + "text": "Damage: The musician says the media coverage that resulted from the three posts in the comments section of a blog has damaged his career, especially in New York .", + "length": 162 + }, + { + "text": "According to the lawsuit, Ms Faircloth said in the posts that she never reported the assault because of the way her own family and friends reacted to the accusations.", + "length": 166 + }, + { + "text": "The singer of indie rock band Bright Eyes is suing a woman for $1million after she accused him of raping her after a concert when she was 16 in a series of online posts.", + "length": 169 + }, + { + "text": "Singer: Conor Oberst , pictured left in 2003 when Ms Faircloth claimed the assault took place, is best known for his work with Bright Eyes, including First Day Of My Life .", + "length": 172 + }, + { + "text": "com that the singer songwriter raped her a decade ago in North Carolina after his brother, who was her English teacher at school, introduced the two at one of the singer's concerts.", + "length": 181 + }, + { + "text": "Conor Oberst filed the lawsuit yesterday claiming he was libelled by the woman, who he has identified as Janie Faircloth, who claimed she was sexually assaulted by the singer-songwriter at a 2003 concert.", + "length": 204 + }, + { + "text": "He says he was in the company of his brother, bandmates or then-girlfriend at the time the claims were made, and the suit also says the woman has made positive social media comments about the guitarist over the last 10 years.", + "length": 225 + }, + { + "text": "An attorney for Oberst filed the civil lawsuit in a Manhattan federal court, with the legal documents claiming Ms Faircloth was a liar who made 'despicable, false, outrageous, and defamatory statements' about Oberst last December.", + "length": 230 + }, + { + "text": "The 33-year-old musician says the media coverage that resulted from the three posts in the comments section of a blog has damaged his career, especially in New York where most of the major music publishing houses have their headquarters.", + "length": 237 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6394383311271667 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:28.131377233Z", + "first_section_created": "2025-12-23T09:35:28.133502015Z", + "last_section_published": "2025-12-23T09:35:28.133699022Z", + "all_results_received": "2025-12-23T09:35:28.206687415Z", + "output_generated": "2025-12-23T09:35:28.206865522Z", + "total_processing_time_ms": 75, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 72, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:28.133502015Z", + "publish_time": "2025-12-23T09:35:28.133699022Z", + "first_worker_start": "2025-12-23T09:35:28.13416684Z", + "last_worker_end": "2025-12-23T09:35:28.205851Z", + "total_journey_time_ms": 72, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:28.134141639Z", + "start_time": "2025-12-23T09:35:28.134207042Z", + "end_time": "2025-12-23T09:35:28.134273344Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:28.134396Z", + "start_time": "2025-12-23T09:35:28.134539Z", + "end_time": "2025-12-23T09:35:28.205851Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 71 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:28.134137839Z", + "start_time": "2025-12-23T09:35:28.134205241Z", + "end_time": "2025-12-23T09:35:28.134285345Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:28.134113638Z", + "start_time": "2025-12-23T09:35:28.13416684Z", + "end_time": "2025-12-23T09:35:28.134197541Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 71, + "min_processing_ms": 71, + "max_processing_ms": 71, + "avg_processing_ms": 71, + "median_processing_ms": 71, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3047, + "slowest_section_id": 0, + "slowest_section_time_ms": 72 + } +} diff --git a/data/output/005f860840460cccf00a80c7cacf6e684a796e30.json b/data/output/005f860840460cccf00a80c7cacf6e684a796e30.json new file mode 100644 index 0000000..3c3c9f4 --- /dev/null +++ b/data/output/005f860840460cccf00a80c7cacf6e684a796e30.json @@ -0,0 +1,218 @@ +{ + "file_name": "005f860840460cccf00a80c7cacf6e684a796e30.txt", + "total_words": 259, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "a", + "count": 5 + }, + { + "word": "after", + "count": 5 + }, + { + "word": "said", + "count": 5 + }, + { + "word": "tourists", + "count": 5 + }, + { + "word": "victoria", + "count": 5 + }, + { + "word": "we", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Leesa Smith For Daily Mail Australia .", + "length": 38 + }, + { + "text": "'We then attended the scene and rescued the victims.", + "length": 52 + }, + { + "text": "The tourists were said to be on their way to Hwange National Park en-route to Mana Pools.", + "length": 89 + }, + { + "text": "However, the ABC has reported that three Australians were critically injured in the crash.", + "length": 90 + }, + { + "text": "Local villagers said the plane turned back to the airport around 11am after an engine failure.", + "length": 94 + }, + { + "text": "At least two Australian tourists have been seriously injured in a light plane crash in Zimbabwe.", + "length": 96 + }, + { + "text": "Local villagers said the plane had turned back to Victoria falls airport (pictured) after an engine failure .", + "length": 109 + }, + { + "text": "' Another villager said the tourists seemed to have sustained serious injuries because they were bleeding profusely.", + "length": 116 + }, + { + "text": "‘Soon after, we heard a loud bang near our fields and we ran and we saw it hitting trees as it crashed,’ one told The Herald.", + "length": 129 + }, + { + "text": "Two Australian tourists and their German pilot have been admitted to Victoria Falls District Hospital, Zimbabwe's The Herald newspaper says.", + "length": 140 + }, + { + "text": "The Cessna aircraft crashed into a paddock near Lupinyu Business Centre not long after taking off from Victoria Falls international airport on Sunday.", + "length": 150 + }, + { + "text": "'We managed to pull them out of the wreckage before the arrival of the ambulances that later rushed them to hospital in Victoria Falls,' a villager said.", + "length": 153 + }, + { + "text": "The Department of Foreign Affairs and Trade confirmed embassy staff in Harare are following up the reports and stand ready to provide consular assistance.", + "length": 154 + }, + { + "text": "At least two Australian tourists have been seriously injured in a Cessna aircraft crash (pictured) in Zimbabwe soon after take off at Victoria Fall airport .", + "length": 157 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8058028817176819 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:28.634468184Z", + "first_section_created": "2025-12-23T09:35:28.634823098Z", + "last_section_published": "2025-12-23T09:35:28.635000405Z", + "all_results_received": "2025-12-23T09:35:28.694607086Z", + "output_generated": "2025-12-23T09:35:28.69472119Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:28.634823098Z", + "publish_time": "2025-12-23T09:35:28.635000405Z", + "first_worker_start": "2025-12-23T09:35:28.635535225Z", + "last_worker_end": "2025-12-23T09:35:28.691617Z", + "total_journey_time_ms": 56, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:28.635527425Z", + "start_time": "2025-12-23T09:35:28.635582727Z", + "end_time": "2025-12-23T09:35:28.635614928Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:28.635798Z", + "start_time": "2025-12-23T09:35:28.635924Z", + "end_time": "2025-12-23T09:35:28.691617Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 55 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:28.635492723Z", + "start_time": "2025-12-23T09:35:28.635539825Z", + "end_time": "2025-12-23T09:35:28.635581327Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:28.635493023Z", + "start_time": "2025-12-23T09:35:28.635535225Z", + "end_time": "2025-12-23T09:35:28.635559226Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 55, + "min_processing_ms": 55, + "max_processing_ms": 55, + "avg_processing_ms": 55, + "median_processing_ms": 55, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1584, + "slowest_section_id": 0, + "slowest_section_time_ms": 56 + } +} diff --git a/data/output/005fa39d1665154d8ccf04654ad8309c5e2c2f1c.json b/data/output/005fa39d1665154d8ccf04654ad8309c5e2c2f1c.json new file mode 100644 index 0000000..7c76fb5 --- /dev/null +++ b/data/output/005fa39d1665154d8ccf04654ad8309c5e2c2f1c.json @@ -0,0 +1,488 @@ +{ + "file_name": "005fa39d1665154d8ccf04654ad8309c5e2c2f1c.txt", + "total_words": 1147, + "top_n_words": [ + { + "word": "the", + "count": 55 + }, + { + "word": "to", + "count": 32 + }, + { + "word": "a", + "count": 30 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "it", + "count": 20 + }, + { + "word": "he", + "count": 19 + }, + { + "word": "his", + "count": 18 + }, + { + "word": "s", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "Definitely.", + "length": 11 + }, + { + "text": "A true test.", + "length": 12 + }, + { + "text": "’ he added.", + "length": 13 + }, + { + "text": "Do you putt it?", + "length": 15 + }, + { + "text": "Do you chip it?", + "length": 15 + }, + { + "text": "and so can you”.", + "length": 18 + }, + { + "text": "It’s very difficult.", + "length": 22 + }, + { + "text": "But Rory will be in contention.", + "length": 31 + }, + { + "text": "‘Who could discount Bubba Watson?", + "length": 35 + }, + { + "text": "‘And then there’s Phil Mickelson.", + "length": 37 + }, + { + "text": "He’s hitting the ball extremely well.", + "length": 39 + }, + { + "text": "They all have a great opportunity again.", + "length": 40 + }, + { + "text": "'Rory, I think, is one of the favourites.", + "length": 41 + }, + { + "text": "‘It was wonderful,’ said Montgomerie.", + "length": 41 + }, + { + "text": "‘Pinehurst is very different this year.", + "length": 41 + }, + { + "text": "‘You never know what’s going to happen.", + "length": 43 + }, + { + "text": "‘The US Open is seen as the toughest to win.", + "length": 46 + }, + { + "text": "‘I guess it was a relief more than anything.", + "length": 46 + }, + { + "text": "It has been a long time coming and I’m thrilled.", + "length": 50 + }, + { + "text": "We’d all love for him to finally win the US Open.", + "length": 51 + }, + { + "text": "Six times runner-up in the one event he’s not won.", + "length": 52 + }, + { + "text": "If it’s at all breezy it could be near impossible.", + "length": 52 + }, + { + "text": "‘What is means is, you’ve got all sorts of options.", + "length": 55 + }, + { + "text": "‘But at the same time, it was great to win in America.", + "length": 56 + }, + { + "text": "He could just blast it round there and get away with it.", + "length": 56 + }, + { + "text": "Absent: Tiger Woods will miss the even with a back injury .", + "length": 59 + }, + { + "text": "And, obviously, Justin is up there with the favourites to win.", + "length": 62 + }, + { + "text": "‘This is a real, real challenge and the greens are firm too.", + "length": 62 + }, + { + "text": "‘There’s a number of guys potentially who could win it really.", + "length": 66 + }, + { + "text": "But Justin Rose is returning to form following his shoulder injury.", + "length": 67 + }, + { + "text": "‘Well, I like the look of Stephen Gallacher right now,’ he said.", + "length": 68 + }, + { + "text": "'Who can say he’s going to win because golf is one of these things.", + "length": 69 + }, + { + "text": "And Montgomerie said: ‘Justin’s chances of retaining are very high.", + "length": 71 + }, + { + "text": "Throwback: He was the first Englishman to do so since Tony Jacklin in 1970 .", + "length": 76 + }, + { + "text": "’ So, what about the chances of a Scottish success in the States on Sunday?", + "length": 77 + }, + { + "text": "VIDEO Scroll down for Rory McIlroy practising his swing ahead of the US Open .", + "length": 78 + }, + { + "text": "’ Plug: Montgomerie poses at Gleneagles at a Johnnie Walker publicity event .", + "length": 79 + }, + { + "text": "‘So, on all of the greens, you may run off and there’s no rough to stop it.", + "length": 79 + }, + { + "text": "They’ve cut back all of the rough - in fact, there’s no rough at all really.", + "length": 80 + }, + { + "text": "‘It would be great if he gets in the Ryder Cup team at Glenagles for Scotland’s sake.", + "length": 89 + }, + { + "text": "’ Pointer: Rose points to the sky after his final putt of his US Ope triumph last year .", + "length": 90 + }, + { + "text": "playing a course that could suit him, especially with the short game wizardry that he has.", + "length": 90 + }, + { + "text": "If you lead going into the last round and shoot a 65 you’d be unlucky not to lose really.", + "length": 91 + }, + { + "text": "Perhaps Gallacher will take inspiration from Montgomerie in his pursuit of a first ever major.", + "length": 94 + }, + { + "text": "Haunted: As is Phil Mickelson, who has come second six times in the event without ever winning it .", + "length": 99 + }, + { + "text": "Victors: Montgomerie, McIlroy and their Europe team-mates celebrating their Ryder Cup win in 2010 .", + "length": 99 + }, + { + "text": "‘Ben Crenshaw and his team have been asked by the USGA to put Pinehurst back to its original state.", + "length": 101 + }, + { + "text": "Balls: The Northern Irishman smiles on the practice range as he gears up for his tilt at the trophy .", + "length": 101 + }, + { + "text": "‘Without Tiger [Woods] there, Rory McIlory is the favourite and you’d expect him to be right there.", + "length": 103 + }, + { + "text": "’ Diamond in the rough: Two-times Masters champion Bubba Watson is a contender for his first US Open .", + "length": 104 + }, + { + "text": "Chillaxed: Colin Montgomerie believes that McIlroy can storm the US open in the absence of Tiger Woods .", + "length": 104 + }, + { + "text": "’ Swinger: McIlroy hits his tee shot on the seventh hole during a practice round at Pinehurst on Tuesday .", + "length": 108 + }, + { + "text": "Instead of stopping short in the rough to give you a flop shot, the ball will now run 30 yards extra down a slope.", + "length": 114 + }, + { + "text": "Prepared: Rory McIlroy talks with his dad Gerry at Pinehurst as he builds up to the US Open starting on Thursday .", + "length": 114 + }, + { + "text": "‘Pinehurst is a ball strikers course and he [McIlroy] being the best ball striker in the world has to start as favourite.", + "length": 123 + }, + { + "text": "‘I think it’s helped Rory, it helped Graeme McDowell and helped Justin Rose in winning these major championships since.", + "length": 123 + }, + { + "text": "22 years I’ve been playing in America, playing in a 130-odd tournaments, so it was nice to finally win a stroke play event!", + "length": 125 + }, + { + "text": "Harrington won back-to-back Open championships in 2007 and 2008 before landing the PGA Championship in the same calendar year.", + "length": 126 + }, + { + "text": "Colin Montgomerie is tipping bookies’ favourite Rory McIlroy to take advantage of Tiger Woods’ absence and win the US Open.", + "length": 127 + }, + { + "text": "And Montgomerie believes the Northern Ireland native will be in contention for a third major success come Sunday’s final round.", + "length": 129 + }, + { + "text": "‘I think that Padraig Harrington gave those types of players belief that, “Look, I can do this lads, I’ve won three out of four majors...", + "length": 143 + }, + { + "text": "‘Going back to a championship that you’ve won, albeit at a different venue, as a defending champion you feel you’ve got a chance to do it again.", + "length": 150 + }, + { + "text": "And Montgomerie, who was McIlroy’s Ryder Cup-winning captain at Celtic Manor four years ago, has warned Pinehurst will be a difficult beast to tame.", + "length": 150 + }, + { + "text": "Justin Rose ended the 43-year hoodoo of an English champion when he became the first since Tony Jacklin in 1970 to win the US Open at Merion last year.", + "length": 151 + }, + { + "text": "Montgomerie, who will be summarising the play for Sky Sports in North Carolina this week, is refusing to rule out the heavyweight American stars though.", + "length": 152 + }, + { + "text": "Montgomerie, 50, reckons Rose has every chance of defending his title in North Carolina and claims it’s all thanks to the benchmark set by Padraig Harrington.", + "length": 160 + }, + { + "text": "Montgomerie, after all, was the last Scot to return home victorious from across the Atlantic after his stunning Senior PGA Championship victory at Harbor Shores last month.", + "length": 172 + }, + { + "text": "The 25-year-old showed his strength and class to win the BMW PGA Championship at Wentworth last month and he shot six-under par to finish tied-15th at the recent Memorial Championship.", + "length": 184 + }, + { + "text": "’ he said, speaking exclusively to Sportsmail at Gleneagles, venue for this year’s Ryder Cup extravaganza, where he launched the Johnnie Walker Blue Label limited edition whiskey on Monday.", + "length": 193 + }, + { + "text": "’ McIlory has admitted he will be making some equipment changes to his bag this week, opting to put a three-iron in and go with a three-wedge system comprising of a pitch wedge, 54 and 59 degree clubs.", + "length": 203 + }, + { + "text": "The world No 6 celebrated his first-ever major triumph back in 2011 when he won the US Open at Congressional and his form of late, despite his high-profile break-up with tennis ace Caroline Wozniacki, has improved considerably.", + "length": 227 + }, + { + "text": "McIlroy will begin his quest for glory on Thursday and Friday alongside two fellow former US Open champions in Webb Simpson and good friend Graeme McDowell, completing a field buoyed by the absence of former world No 1 Woods who remains sidelined with a back problem.", + "length": 267 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5239270925521851 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:29.136169782Z", + "first_section_created": "2025-12-23T09:35:29.136814907Z", + "last_section_published": "2025-12-23T09:35:29.13715172Z", + "all_results_received": "2025-12-23T09:35:29.229238243Z", + "output_generated": "2025-12-23T09:35:29.229493053Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:29.136814907Z", + "publish_time": "2025-12-23T09:35:29.137015814Z", + "first_worker_start": "2025-12-23T09:35:29.137597537Z", + "last_worker_end": "2025-12-23T09:35:29.226579Z", + "total_journey_time_ms": 89, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:29.137542535Z", + "start_time": "2025-12-23T09:35:29.137597537Z", + "end_time": "2025-12-23T09:35:29.137706041Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:29.137866Z", + "start_time": "2025-12-23T09:35:29.138013Z", + "end_time": "2025-12-23T09:35:29.226579Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 88 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:29.137552235Z", + "start_time": "2025-12-23T09:35:29.137610337Z", + "end_time": "2025-12-23T09:35:29.137713041Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:29.137544335Z", + "start_time": "2025-12-23T09:35:29.137605737Z", + "end_time": "2025-12-23T09:35:29.137648739Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:29.137056416Z", + "publish_time": "2025-12-23T09:35:29.13715172Z", + "first_worker_start": "2025-12-23T09:35:29.137639638Z", + "last_worker_end": "2025-12-23T09:35:29.228678Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:29.137602337Z", + "start_time": "2025-12-23T09:35:29.137639638Z", + "end_time": "2025-12-23T09:35:29.13767474Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:29.137913Z", + "start_time": "2025-12-23T09:35:29.138078Z", + "end_time": "2025-12-23T09:35:29.228678Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:29.137640038Z", + "start_time": "2025-12-23T09:35:29.13768044Z", + "end_time": "2025-12-23T09:35:29.137722942Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:29.137604737Z", + "start_time": "2025-12-23T09:35:29.137654839Z", + "end_time": "2025-12-23T09:35:29.13767184Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 178, + "min_processing_ms": 88, + "max_processing_ms": 90, + "avg_processing_ms": 89, + "median_processing_ms": 90, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3199, + "slowest_section_id": 1, + "slowest_section_time_ms": 91 + } +} diff --git a/data/output/005fd0fe0dda2d0e94cc8279b91cef2c0fb4a704.json b/data/output/005fd0fe0dda2d0e94cc8279b91cef2c0fb4a704.json new file mode 100644 index 0000000..eeef6d6 --- /dev/null +++ b/data/output/005fd0fe0dda2d0e94cc8279b91cef2c0fb4a704.json @@ -0,0 +1,532 @@ +{ + "file_name": "005fd0fe0dda2d0e94cc8279b91cef2c0fb4a704.txt", + "total_words": 1587, + "top_n_words": [ + { + "word": "the", + "count": 84 + }, + { + "word": "to", + "count": 64 + }, + { + "word": "and", + "count": 37 + }, + { + "word": "a", + "count": 36 + }, + { + "word": "hinckley", + "count": 36 + }, + { + "word": "in", + "count": 36 + }, + { + "word": "she", + "count": 35 + }, + { + "word": "said", + "count": 34 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "he", + "count": 26 + } + ], + "sorted_sentences": [ + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Elizabeths.", + "length": 11 + }, + { + "text": "Elizabeths.", + "length": 11 + }, + { + "text": "Elizabeths.", + "length": 11 + }, + { + "text": "in the Williamsburg area.", + "length": 25 + }, + { + "text": "Former President George W.", + "length": 26 + }, + { + "text": "Sims said that is the case.", + "length": 27 + }, + { + "text": "Under the proposal made by St.", + "length": 30 + }, + { + "text": "Kennedy was assassinated in 1963.", + "length": 33 + }, + { + "text": "\"That's a concern to me,\" Sims said quietly.", + "length": 44 + }, + { + "text": "CNN's James Polk contributed to this report.", + "length": 44 + }, + { + "text": "She said, \"He doesn't bother anybody\" either.", + "length": 45 + }, + { + "text": "She said he gravitated to books on music and art.", + "length": 49 + }, + { + "text": "Since his conviction, Hinckley has been living in St.", + "length": 53 + }, + { + "text": "Currently, he spends 10 days a month in Williamsburg.", + "length": 53 + }, + { + "text": "Hinckley at one time told some of his caregivers at St.", + "length": 55 + }, + { + "text": "Elizabeths, a government mental hospital in Washington.", + "length": 55 + }, + { + "text": "Hinckley later ended the engagement, doctors have said.", + "length": 55 + }, + { + "text": "The group was surprised to see Hinckley there, she said.", + "length": 56 + }, + { + "text": "She said agents are always waiting in a vehicle and follow along.", + "length": 65 + }, + { + "text": "Elizabeths that he was engaged to CB, who had been a patient there.", + "length": 67 + }, + { + "text": "\"In general, the people in town have been very tolerant,\" she said.", + "length": 67 + }, + { + "text": "Sims said she wanted to set the record straight about what happened.", + "length": 68 + }, + { + "text": "\"He's not bothered by people, he's not pointed out,\" she told the court.", + "length": 72 + }, + { + "text": "Washington (CNN) -- The sister of presidential assailant John Hinckley Jr.", + "length": 74 + }, + { + "text": "Bush and his wife, Laura, moved to Dallas after they left the White House.", + "length": 74 + }, + { + "text": "Sims replied the family did and was happy to cooperate with the Secret Service.", + "length": 79 + }, + { + "text": "Sims said she has no worries that Hinckley would be a risk to others in Dallas.", + "length": 79 + }, + { + "text": "But at the end of a March 2011 visit, the Secret Service was not waiting, she said.", + "length": 83 + }, + { + "text": "She said she is aware her brother still sees CB sometimes when the woman visits St.", + "length": 83 + }, + { + "text": "Sims was asked if she ever noticed Secret Service agents keeping an eye on Hinckley.", + "length": 84 + }, + { + "text": "She was asked many questions about how the 56-year-old fits into the Virginia community.", + "length": 88 + }, + { + "text": "\"The subject was not observed picking up and looking at specific books,\" the report says.", + "length": 89 + }, + { + "text": "But she said all the members were in their mid-70s and her brother would not have fit in.", + "length": 89 + }, + { + "text": "He would also be expected to continue his volunteer activities at Eastern State Hospital.", + "length": 89 + }, + { + "text": "\" Sims often spends time in Williamsburg when Hinckley is there and drives him back to St.", + "length": 90 + }, + { + "text": "But in recent years he's been allowed to visit his mother's home with increasing frequency.", + "length": 91 + }, + { + "text": "All of those men survived, though Brady was shot in the head and left permanently disabled.", + "length": 91 + }, + { + "text": "Elizabeths he would be granted two visits of 17 days each followed by six visits of 24 days.", + "length": 92 + }, + { + "text": "22-caliber revolvers, one of which he used to shoot Reagan and the others five months later.", + "length": 92 + }, + { + "text": "Hinckley's sister testified on the fourth day of a multiple-day hearing to discuss his future.", + "length": 94 + }, + { + "text": "The judge noted the report did not indicate that Hinckley read that book or any similar books.", + "length": 94 + }, + { + "text": "The report was filed with the court under seal on July 29 and was not made public until Tuesday.", + "length": 96 + }, + { + "text": "testified Tuesday that she has seen no sign that her brother represents a danger to himself or others.", + "length": 102 + }, + { + "text": "Sims said the agent asked the Hinckley family's location and requested that they wait for him to catch up.", + "length": 106 + }, + { + "text": "She and her brother believed members of the group knew they were coming, but that was not the case, she said.", + "length": 109 + }, + { + "text": "\" Hinckley's sister said she had accompanied him to bookstores and never saw him look at any books like that.", + "length": 109 + }, + { + "text": "But she said she does not think it would be a good option for him to move to the Dallas area, where she lives.", + "length": 110 + }, + { + "text": "Hinckley is experiencing a good mental status and that he does not present as [sic] a danger to himself or others.", + "length": 114 + }, + { + "text": "Sims said that when her brother goes out in the community he usually has no problems with people who recognize him.", + "length": 115 + }, + { + "text": "He flew to Dallas to stay that weekend at his sister's home while she and her husband were away at a football game.", + "length": 115 + }, + { + "text": "he feels comfortable there and has indicated he wants to stay there even when his elderly mother is no longer there.", + "length": 116 + }, + { + "text": "On that Monday, Hinckley told her he was going out to look for a job, but, unknown to her, he bought the guns instead.", + "length": 118 + }, + { + "text": "\" If so, the recommendation said, Hinckley would be \"conditionally released to reside permanently on convalescent leave.", + "length": 120 + }, + { + "text": "Hinckley would be expected at the outset to live with his widowed mother, who is turning 86 this week and is in good health.", + "length": 124 + }, + { + "text": "Under questioning, Sims acknowledged the family does not permit a woman identified in court only as CB to visit the Hinckley home.", + "length": 130 + }, + { + "text": "The doctors signing the report said such leave would be permitted only upon an assessment, at the end of the new visits, that \"Mr.", + "length": 130 + }, + { + "text": "She replied the only time she has noticed surveillance is when she drives Hinckley out of her mother's gated community and back to St.", + "length": 134 + }, + { + "text": "Only days before the purchase, three guns had been confiscated from Hinckley's luggage while he was trying to board a plane in Nashville.", + "length": 137 + }, + { + "text": "She said Hinckley and his family members find they are able to go to certain restaurants where people know who they are but are welcoming.", + "length": 138 + }, + { + "text": "She also said she thinks a move to Dallas would place Hinckley too far away from the medical experts who have cared for him for many years.", + "length": 139 + }, + { + "text": "Sims said her brother never told the family he was engaged and -- quite to the contrary -- has said he's not engaged and has no plans to marry the woman.", + "length": 153 + }, + { + "text": "\"President Bush lives not 10 minutes from me and I think it would be a concern,\" said Sims, apparently referring to concerns the Secret Service might have.", + "length": 155 + }, + { + "text": "According to Sims, about midway through the trip to Washington, Hinckley's brother, Scott, got a call on his cell phone from an agent who said he had been late.", + "length": 160 + }, + { + "text": "District Judge Paul Friedman asked if it is accurate she is worried about her brother's personal safety in Dallas, since that is the city where President John F.", + "length": 161 + }, + { + "text": "Previously the court heard testimony that Sims had taken her brother to a singles group meeting in Williamsburg a number of years ago, and he was asked to leave.", + "length": 161 + }, + { + "text": "The gun purchase was not mentioned in court Tuesday, and the government has not said it would be more dangerous for Hinckley to live in Dallas than anywhere else.", + "length": 162 + }, + { + "text": "But in response to questioning by a prosecutor, she said she wasn't aware Hinckley had failed to go to the movies as planned on two occasions, in July and in September.", + "length": 168 + }, + { + "text": "Hinckley posted bond, was released and flew to New York City, then spent the night in New Haven, Connecticut, where actress Jodie Foster was a college freshman student.", + "length": 168 + }, + { + "text": "\" He would be required to meet at least once a month with a psychiatrist who is already counseling him in Williamsburg and to continue his weekly visits to a therapist there.", + "length": 174 + }, + { + "text": "Last week, prosecutors said Hinckley had not told the truth about deviating from his approved itinerary to go to a movie while on his few hours of permitted unaccompanied time.", + "length": 176 + }, + { + "text": "However, the filing said, should Hinckley violate the terms of his convalescent leave, \"the hospital will return him to total inpatient care with due notification of the court.", + "length": 176 + }, + { + "text": "Hinckley was staying at the home of his sister and her husband in Dallas on October 13, 1980, when, according to testimony in his 1982 trial, he went to a pawn shop to buy two .", + "length": 177 + }, + { + "text": "\"One item of note is the subject stopped for a time and looked at the shelves in the American History area that contain several books about President Reagan and his attempted assassination.", + "length": 189 + }, + { + "text": "Prosecutors described CB as being psychologically unstable and asked if it was accurate Hinckley's mother does not allow her to visit for fear she would have a mental breakdown at the home.", + "length": 189 + }, + { + "text": "According to Sims, Hinckley likes working part time in the library of Eastern State Hospital and is comfortable with the psychiatrist and case manager/therapist he sees while in Williamsburg.", + "length": 191 + }, + { + "text": "Hinckley was found not guilty by reason of insanity in the 1981 shootings of President Ronald Reagan, press secretary James Brady, Secret Service agent Timothy McCarthy, and police officer Thomas Delahanty.", + "length": 206 + }, + { + "text": "Diane Sims, who said she loves her brother, said she supports a proposal that would expand his visits to their mother's home in Williamsburg, Virginia, and might eventually allow him to live there as a full-time outpatient.", + "length": 223 + }, + { + "text": "However, the hospital said should his \"mother not be available\" after Hinckley's release, his brother and sister had expressed interest \"in the housing options of independent apartments as well as Assisted Living Facilities...", + "length": 226 + }, + { + "text": "The hospital's recommendation for Hinckley's eventual release on convalescent leave asks the judge to allow Hinckley to live full time in Williamsburg \"at the discretion of the hospital\" once the eight longer visits have been completed successfully.", + "length": 249 + }, + { + "text": "According to a report by Secret Service agents who were watching Hinckley without his knowledge, on one outing last July, Hinckley did not attend a movie and instead went to a bookstore where he passed by an aisle of books that included an account of the day Reagan was shot.", + "length": 275 + }, + { + "text": "\" If the judge were to grant convalescent leave at the hospital's discretion at the end of the eight longer visits now requested, it would be unlikely to happen until at least the fall of 2012, since as the filing requires a minimum two-week interval between each of the new visits.", + "length": 282 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5056720674037933 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:29.637922582Z", + "first_section_created": "2025-12-23T09:35:29.63918373Z", + "last_section_published": "2025-12-23T09:35:29.639546944Z", + "all_results_received": "2025-12-23T09:35:29.740468606Z", + "output_generated": "2025-12-23T09:35:29.740721916Z", + "total_processing_time_ms": 102, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 100, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:29.63918373Z", + "publish_time": "2025-12-23T09:35:29.639404739Z", + "first_worker_start": "2025-12-23T09:35:29.63996396Z", + "last_worker_end": "2025-12-23T09:35:29.739584Z", + "total_journey_time_ms": 100, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:29.639900858Z", + "start_time": "2025-12-23T09:35:29.63996396Z", + "end_time": "2025-12-23T09:35:29.640048263Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:29.640212Z", + "start_time": "2025-12-23T09:35:29.640475Z", + "end_time": "2025-12-23T09:35:29.739584Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 99 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:29.63995936Z", + "start_time": "2025-12-23T09:35:29.640049263Z", + "end_time": "2025-12-23T09:35:29.640166468Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:29.639914258Z", + "start_time": "2025-12-23T09:35:29.639994661Z", + "end_time": "2025-12-23T09:35:29.640033063Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:29.63943374Z", + "publish_time": "2025-12-23T09:35:29.639546944Z", + "first_worker_start": "2025-12-23T09:35:29.640066464Z", + "last_worker_end": "2025-12-23T09:35:29.713438Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:29.640244071Z", + "start_time": "2025-12-23T09:35:29.640365375Z", + "end_time": "2025-12-23T09:35:29.640466779Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:29.640328Z", + "start_time": "2025-12-23T09:35:29.640456Z", + "end_time": "2025-12-23T09:35:29.713438Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:29.639985261Z", + "start_time": "2025-12-23T09:35:29.640180368Z", + "end_time": "2025-12-23T09:35:29.640298073Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:29.640014162Z", + "start_time": "2025-12-23T09:35:29.640066464Z", + "end_time": "2025-12-23T09:35:29.640104965Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 171, + "min_processing_ms": 72, + "max_processing_ms": 99, + "avg_processing_ms": 85, + "median_processing_ms": 99, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4474, + "slowest_section_id": 0, + "slowest_section_time_ms": 100 + } +} diff --git a/data/output/00602b38932b8d891197310395816a464e4d1793.json b/data/output/00602b38932b8d891197310395816a464e4d1793.json new file mode 100644 index 0000000..f7492f0 --- /dev/null +++ b/data/output/00602b38932b8d891197310395816a464e4d1793.json @@ -0,0 +1,508 @@ +{ + "file_name": "00602b38932b8d891197310395816a464e4d1793.txt", + "total_words": 1067, + "top_n_words": [ + { + "word": "the", + "count": 68 + }, + { + "word": "and", + "count": 31 + }, + { + "word": "in", + "count": 30 + }, + { + "word": "of", + "count": 28 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "river", + "count": 16 + }, + { + "word": "thames", + "count": 14 + }, + { + "word": "have", + "count": 12 + }, + { + "word": "home", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'(We .", + "length": 6 + }, + { + "text": "A £3.", + "length": 6 + }, + { + "text": "south..", + "length": 7 + }, + { + "text": "Laura .", + "length": 7 + }, + { + "text": "Rising .", + "length": 8 + }, + { + "text": "Ireland.", + "length": 8 + }, + { + "text": "Flooding .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Tara Brady .", + "length": 12 + }, + { + "text": "Communities .", + "length": 13 + }, + { + "text": "Fingers crossed.", + "length": 16 + }, + { + "text": "ground in Scotland.", + "length": 19 + }, + { + "text": "'We prepared for it.", + "length": 20 + }, + { + "text": "14:49 EST, 9 January 2014 .", + "length": 27 + }, + { + "text": "06:35 EST, 10 January 2014 .", + "length": 28 + }, + { + "text": "You will save so much stuff.", + "length": 28 + }, + { + "text": "' Wales and South-West England are to .", + "length": 39 + }, + { + "text": "with a high chance of snow in the North.", + "length": 40 + }, + { + "text": "This was the scene in Wargrave, Berkshire .", + "length": 43 + }, + { + "text": "But come Sunday, the rain will return in the .", + "length": 46 + }, + { + "text": "The home is now completely surrounded by water.", + "length": 47 + }, + { + "text": "Floods caused havoc along the River Thames today.", + "length": 49 + }, + { + "text": "'The water is still about 15\" below 1983 around here.", + "length": 53 + }, + { + "text": "MeteoGroup said: ‘There will be rain across a lot of .", + "length": 56 + }, + { + "text": "Of those, less than 10 were viable and affordable quotes.", + "length": 57 + }, + { + "text": "75 million home in Quarry Wood Road, Marlow, flooded today.", + "length": 59 + }, + { + "text": "'We are nowhere near as bad as some we are seeing on the news.", + "length": 62 + }, + { + "text": "flooding from groundwater in Dorset, Wiltshire and West Sussex.", + "length": 63 + }, + { + "text": "endure more rain tomorrow, which will return again on Sunday - .", + "length": 64 + }, + { + "text": "England and Wales today and also showers in Scotland and Northern .", + "length": 67 + }, + { + "text": "Dorset, the Parrett in Somerset and the Severn through the Midlands.", + "length": 68 + }, + { + "text": "rivers include the Hampshire Avon through Wiltshire, Hampshire and .", + "length": 68 + }, + { + "text": "continued on parts of the Somerset Levels, and there remained a risk of .", + "length": 73 + }, + { + "text": "will also have) snow across the hills in north England and reaching the .", + "length": 73 + }, + { + "text": "The home, built on stilts, was unaffected by the floods but totally marooned .", + "length": 78 + }, + { + "text": "He wrote: 'Thank you to all those who asked about our flooding but don't worry.", + "length": 79 + }, + { + "text": "However, judging by this aerial shot, the magician may have had to make a dash for it.", + "length": 86 + }, + { + "text": "Evacuate: The River Thames burst its banks flooding homes in Marlow, Buckinghamshire .", + "length": 86 + }, + { + "text": "People on the River Severn would love to have a barrier and flood relief scheme like ours.", + "length": 90 + }, + { + "text": "Homes in Marlow, Buckinghamshire, flooded today when the banks of the River Thames burst .", + "length": 90 + }, + { + "text": "Wargrave, in Berkshire, was devastated by water from the River Thames when its banks burst .", + "length": 92 + }, + { + "text": "Paul Daniels' home is right next to the banks of the River Thames at Wargrave in Berkshire .", + "length": 92 + }, + { + "text": "Gallons of floodwater cascaded into the TV funnyman's home which he shares with his partner .", + "length": 93 + }, + { + "text": "' Magician Paul Daniels and wife Debbie McGee at their home in Wargrave which has been flooded .", + "length": 96 + }, + { + "text": "A tractor pulls out a car in Runnymede which was flooded after the River Thames burst its banks .", + "length": 97 + }, + { + "text": "‘With that though we will have colder temperatures, dropping to below freezing on Saturday evening.", + "length": 101 + }, + { + "text": "' Celebrity Paul Daniels' home (circled) was flooded this week after the River Thames burst its banks .", + "length": 103 + }, + { + "text": "'The pressure has been greater for us but the River Thames is quite controlled compared to other rivers.", + "length": 104 + }, + { + "text": "' Gallons of floodwater cascaded into Paul Daniels' home which he shares with his partner Debbie McGee .", + "length": 104 + }, + { + "text": "This was the scene in Marlow, Buckinghamshire, after the River Thames burst its banks and flooded homes .", + "length": 105 + }, + { + "text": "TV magician and celebrity Paul Daniels' home was flooded this week after the River Thames burst its banks.", + "length": 106 + }, + { + "text": "'They can make that decision from their home before they bother stepping through the door,' said Mr Milsom.", + "length": 107 + }, + { + "text": "' A luxury home in Wargrave, Berkshire, which this afternoon was engulfed by floodwater from the River Thames.", + "length": 110 + }, + { + "text": "along the Thames throughout Surrey, Berkshire and Oxfordshire were warned today they were at risk of flooding.", + "length": 110 + }, + { + "text": "Estate agents have warned of the huge impact continued flooding of multi-million pound homes between Henley and Wargrave .", + "length": 122 + }, + { + "text": "The River Thames expands across the flood plains between Henley and Wargrave as the surge of water burst its banks today .", + "length": 122 + }, + { + "text": "The forecaster added: ‘It will all begin to clear throughout tomorrow before dry and sunny weather arrives at the weekend.", + "length": 124 + }, + { + "text": "Gallons of floodwater cascaded into the TV funnyman's home which he shares with his partner Debbie McGee in Wargrave, Berkshire.", + "length": 128 + }, + { + "text": "Earlier in the day he tweeted advising neighbours to 'get inflatable paddling pools downstairs and dump everything you can into them.", + "length": 133 + }, + { + "text": "Andrew Milsom, director of Andrew Milson Estate Agents, warned that flooding could pose ongoing problems for the area's housing market.", + "length": 135 + }, + { + "text": "'I personally would have to say that I would not in a million years buy a house that I know floods every year - it is a horror story every year.", + "length": 144 + }, + { + "text": "Interesting that in the last 10 years the Environment Agency have failed to control the waters, which were well predicted as going to come again.", + "length": 145 + }, + { + "text": "'If a property has flooded in the past and the owners have needed to make an insurance claim, it does cause concern to people looking to buy,' he said.", + "length": 151 + }, + { + "text": "'For some reason people buying a property next to the river are always surprised when the insurance company tells them it is on a flood plain,' he said.", + "length": 152 + }, + { + "text": "Houses in the so-called 'stockbroker belt' along the River Thames suffered as flood waters have spilled into basements, ground floors and gardens today.", + "length": 152 + }, + { + "text": "Wet: The rain will continue this weekend, as river levels continue to rise in counties including Wiltshire, Hampshire, Dorset, Somerset and the Midlands .", + "length": 154 + }, + { + "text": "Red alert: Flooding continued on parts of the Somerset Levels, and there remained a risk of flooding from groundwater in Dorset, Wiltshire and West Sussex .", + "length": 156 + }, + { + "text": "Estate agents have warned of the huge impact continued flooding of multi-million pound homes may have on some of the country's most expensive property markets.", + "length": 159 + }, + { + "text": "'Insurance companies are becoming increasingly difficult - we sold a riverside property in Temple over the summer and the buyer went to 96 insurance companoes.", + "length": 159 + }, + { + "text": "'This has been quite a bad year as we have had a lot of rain, but with most properties the garden will flood and it is inconvenient but not too much of a problem.", + "length": 162 + }, + { + "text": "Tim Peers, of Peers and Hilton estate agents in Shiplake, Berkshire, added: 'The only way you can really get insurance is by taking on the existing owners' insurance.", + "length": 166 + }, + { + "text": "' He added that potential buyers would rule out properties without viewing them, if their postcode was classed as in an area at risk of flooding, regardless of its individual siting.", + "length": 182 + }, + { + "text": "Exclusive addresses in Marlow and Cookham, Buckinghamshire, which count Heston Blumenthal, Sir Clive Woodward and Andrew Strauss among residents, were submerged leaving residents counting the immediate costs of insurance claims and repairing damage.", + "length": 249 + }, + { + "text": "Warning: Earlier in the day the magician tweeted advising neighbours to 'get inflatable paddling pools downstairs and dump everything you can into them' Peter Nicholls, director of Jacksons estate agents which is advertising the property, gave a more positive outlook.", + "length": 268 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5283461809158325 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:30.140331707Z", + "first_section_created": "2025-12-23T09:35:30.14067792Z", + "last_section_published": "2025-12-23T09:35:30.140995032Z", + "all_results_received": "2025-12-23T09:35:30.248464045Z", + "output_generated": "2025-12-23T09:35:30.248648552Z", + "total_processing_time_ms": 108, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 107, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:30.14067792Z", + "publish_time": "2025-12-23T09:35:30.140893328Z", + "first_worker_start": "2025-12-23T09:35:30.141413848Z", + "last_worker_end": "2025-12-23T09:35:30.247615Z", + "total_journey_time_ms": 106, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:30.141429349Z", + "start_time": "2025-12-23T09:35:30.141505752Z", + "end_time": "2025-12-23T09:35:30.141584855Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:30.141725Z", + "start_time": "2025-12-23T09:35:30.141886Z", + "end_time": "2025-12-23T09:35:30.247615Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 105 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:30.141317244Z", + "start_time": "2025-12-23T09:35:30.141413848Z", + "end_time": "2025-12-23T09:35:30.141511452Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:30.141585655Z", + "start_time": "2025-12-23T09:35:30.141653257Z", + "end_time": "2025-12-23T09:35:30.14171306Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:30.14094513Z", + "publish_time": "2025-12-23T09:35:30.140995032Z", + "first_worker_start": "2025-12-23T09:35:30.141605756Z", + "last_worker_end": "2025-12-23T09:35:30.232489Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:30.141576654Z", + "start_time": "2025-12-23T09:35:30.141605756Z", + "end_time": "2025-12-23T09:35:30.141639657Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:30.141838Z", + "start_time": "2025-12-23T09:35:30.141972Z", + "end_time": "2025-12-23T09:35:30.232489Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:30.141586655Z", + "start_time": "2025-12-23T09:35:30.141623156Z", + "end_time": "2025-12-23T09:35:30.141665358Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:30.141587155Z", + "start_time": "2025-12-23T09:35:30.141637757Z", + "end_time": "2025-12-23T09:35:30.141655357Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 195, + "min_processing_ms": 90, + "max_processing_ms": 105, + "avg_processing_ms": 97, + "median_processing_ms": 105, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3222, + "slowest_section_id": 0, + "slowest_section_time_ms": 106 + } +} diff --git a/data/output/0060402b86ed6e5d251c0d3f49f573e02c1733ef.json b/data/output/0060402b86ed6e5d251c0d3f49f573e02c1733ef.json new file mode 100644 index 0000000..0126de9 --- /dev/null +++ b/data/output/0060402b86ed6e5d251c0d3f49f573e02c1733ef.json @@ -0,0 +1,282 @@ +{ + "file_name": "0060402b86ed6e5d251c0d3f49f573e02c1733ef.txt", + "total_words": 508, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "planet", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "as", + "count": 6 + }, + { + "word": "this", + "count": 6 + }, + { + "word": "been", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "2-040308.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Mark Prigg .", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "9 in infrared light.", + "length": 20 + }, + { + "text": "06:37 EST, 14 November 2012 .", + "length": 29 + }, + { + "text": "12:36 EST, 14 November 2012 .", + "length": 29 + }, + { + "text": "It is 100 light years away from our solar system .", + "length": 50 + }, + { + "text": "'It’s like looking for a single needle in amongst thousands of haystacks.", + "length": 75 + }, + { + "text": "'Astronomers weren’t sure whether to categorise them as planets or as Brown Dwarfs.", + "length": 85 + }, + { + "text": "'Now we will be looking for them amongst an astronomical number of sources further afield.", + "length": 90 + }, + { + "text": "' ESO's Very Large Telescope (VLT) at Paranal in Chile, where many of the measurements were made .", + "length": 98 + }, + { + "text": "This information will in turn enable astronomers to better understand planets that do orbit stars.", + "length": 98 + }, + { + "text": "An artist's impression shows the free-floating planet CFBDSIR J214947, the 'loneliest planet' without a star.", + "length": 109 + }, + { + "text": "' This closeup of an image captured by the La Silla Observatory shows the free-floating planet CFBDSIR J214947.", + "length": 111 + }, + { + "text": "A ‘homeless planet’ which floats through space without orbiting a star had been discovered for the first time.", + "length": 114 + }, + { + "text": "'We observed hundreds of millions of stars and planets, but we only found one homeless planet in our neighbourhood.", + "length": 115 + }, + { + "text": "'Brown dwarfs are what we could call failed stars, as they never manage to initiate nuclear reactions in their centres.", + "length": 119 + }, + { + "text": "' The absence of a shining star in the area around this planet enabled the team to study its atmosphere in great detail.", + "length": 120 + }, + { + "text": "The planet is called CFBDSIR2149 and appears to be part of a group of very young stars known as the AB Doradus Moving Group.", + "length": 124 + }, + { + "text": "The planet is called CFBDSIR2149 and appears to be part of a group of very young stars known as the AB Doradus Moving Group.", + "length": 124 + }, + { + "text": "The study’s findings support theories that suggest these kinds of isolated objects are more common than currently believed.", + "length": 125 + }, + { + "text": "It is between 50 and 120 million years old, with a temperature of approximately 400 degrees celsius, and weighs four to seven times that of Jupiter.", + "length": 148 + }, + { + "text": "The isolated planet, which astronomers believe may have been flung away during its formation, is not tied by gravity to a star and in 100 light years away.", + "length": 155 + }, + { + "text": "Astrophysicist Ms Artigau, who also worked on the study, said: 'This object was discovered during a scan that covered the equivalent of 1,000 times the surface of the full moon.", + "length": 177 + }, + { + "text": "Itienne Artigau, an astrophysicist at UdeM, said: 'Although theorists had established the existence of this type of very cold and young planet, one had never been observed until today.", + "length": 184 + }, + { + "text": "Scientists have speculated on the existence of such a planet and have been trawling the night skies for more than a decade, although the hunt was described as looking for a ‘needle in a thousand haystacks’.", + "length": 210 + }, + { + "text": "'Over the past few years, several objects of this type have been identified, but their existence could not be established without scientific confirmation of their age,' said Jonathan Gagni, a doctoral student of physics at UdeM.", + "length": 228 + }, + { + "text": "University of Montreal (UdeM) researchers working with European colleagues and data provided by the Canada-France-Hawaii Telescope (CFHT) and the European Southern Observatory’s Very Large Telescope (VLT) came across the huge discovery.", + "length": 238 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4525875449180603 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:30.641747494Z", + "first_section_created": "2025-12-23T09:35:30.642036905Z", + "last_section_published": "2025-12-23T09:35:30.642210811Z", + "all_results_received": "2025-12-23T09:35:30.710511225Z", + "output_generated": "2025-12-23T09:35:30.710722833Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:30.642036905Z", + "publish_time": "2025-12-23T09:35:30.642210811Z", + "first_worker_start": "2025-12-23T09:35:30.642585826Z", + "last_worker_end": "2025-12-23T09:35:30.709536Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:30.642740132Z", + "start_time": "2025-12-23T09:35:30.642813034Z", + "end_time": "2025-12-23T09:35:30.642867036Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:30.642929Z", + "start_time": "2025-12-23T09:35:30.643092Z", + "end_time": "2025-12-23T09:35:30.709536Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:30.642619427Z", + "start_time": "2025-12-23T09:35:30.64269483Z", + "end_time": "2025-12-23T09:35:30.642758532Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:30.642532924Z", + "start_time": "2025-12-23T09:35:30.642585826Z", + "end_time": "2025-12-23T09:35:30.642610227Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3050, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/0060736c50b5d3ab6563eb8041da2f037d8f923b.json b/data/output/0060736c50b5d3ab6563eb8041da2f037d8f923b.json new file mode 100644 index 0000000..2534ba8 --- /dev/null +++ b/data/output/0060736c50b5d3ab6563eb8041da2f037d8f923b.json @@ -0,0 +1,794 @@ +{ + "file_name": "0060736c50b5d3ab6563eb8041da2f037d8f923b.txt", + "total_words": 1807, + "top_n_words": [ + { + "word": "the", + "count": 100 + }, + { + "word": "to", + "count": 71 + }, + { + "word": "of", + "count": 58 + }, + { + "word": "a", + "count": 49 + }, + { + "word": "he", + "count": 34 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "in", + "count": 28 + }, + { + "word": "for", + "count": 26 + }, + { + "word": "be", + "count": 23 + }, + { + "word": "as", + "count": 20 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "He .", + "length": 4 + }, + { + "text": "Fiji.", + "length": 5 + }, + { + "text": "time.", + "length": 5 + }, + { + "text": "Tory .", + "length": 6 + }, + { + "text": "agenda.", + "length": 7 + }, + { + "text": "' The .", + "length": 7 + }, + { + "text": "well’.", + "length": 8 + }, + { + "text": "' 'The .", + "length": 8 + }, + { + "text": "Daybreak.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "lobbyists.", + "length": 10 + }, + { + "text": "” ’.", + "length": 10 + }, + { + "text": "‘We are .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "And he said he .", + "length": 16 + }, + { + "text": "Former Cabinet .", + "length": 16 + }, + { + "text": "He was recorded .", + "length": 17 + }, + { + "text": "Queen’s Speech.", + "length": 17 + }, + { + "text": "British politics'.", + "length": 18 + }, + { + "text": "The Sunday Times .", + "length": 18 + }, + { + "text": "Labour has done so.", + "length": 19 + }, + { + "text": "figure considerably.", + "length": 20 + }, + { + "text": "He told undercover .", + "length": 20 + }, + { + "text": "18:09 EST, 2 June 2013 .", + "length": 24 + }, + { + "text": "12:30 EST, 3 June 2013 .", + "length": 24 + }, + { + "text": "you host a function for me?", + "length": 27 + }, + { + "text": "When calculating the campaign .", + "length": 31 + }, + { + "text": "wanted them to,' Mr Clegg said.", + "length": 31 + }, + { + "text": "Framwellgate denied wrongdoing.", + "length": 31 + }, + { + "text": "And unions appear to be the main .", + "length": 34 + }, + { + "text": "The Deputy Prime Minister admitted .", + "length": 36 + }, + { + "text": "Voters will be able to sack sleazy .", + "length": 36 + }, + { + "text": "'Clearly there are instances where .", + "length": 36 + }, + { + "text": "The coalition will press ahead with .", + "length": 37 + }, + { + "text": "In what was branded a ‘new low for .", + "length": 38 + }, + { + "text": "Lord Laird told undercover reporters .", + "length": 38 + }, + { + "text": "The reforms will ensure that the true .", + "length": 39 + }, + { + "text": "Lord Mackenzie, a former president of .", + "length": 39 + }, + { + "text": "Westminster the overhaul it really needs.", + "length": 41 + }, + { + "text": "But they and Ulster Unionist Lord Laird .", + "length": 41 + }, + { + "text": "parliamentary business in exchange for cash.", + "length": 44 + }, + { + "text": "antidote,' Mr Clegg wrote in the Daily Telegraph.", + "length": 49 + }, + { + "text": "in the year before a general election, said No 10.", + "length": 50 + }, + { + "text": "those making major donations of more than £100,000.", + "length": 52 + }, + { + "text": "‘bribe’ in order to get others to join the group.", + "length": 53 + }, + { + "text": "for paying clients on the terrace of the House of Lords.", + "length": 56 + }, + { + "text": "scandal escalated yesterday after Labour suspended two .", + "length": 56 + }, + { + "text": "MPs and lobbyists will be regulated, Mr Clegg vowed as he .", + "length": 59 + }, + { + "text": "saying: ‘I’ll deny having said this, but it’s a bribe.", + "length": 60 + }, + { + "text": "to senior members of all three of the main political parties.", + "length": 61 + }, + { + "text": "the Ulster Unionists after they were recorded offering to do .", + "length": 62 + }, + { + "text": "powers were not in place by the next general election in 2015.", + "length": 62 + }, + { + "text": "He claims he ‘quickly became suspicious’ of the journalists.", + "length": 64 + }, + { + "text": "insisted the government would act to 'clean up' British politics.", + "length": 65 + }, + { + "text": "lobbyists representing a fake solar energy firm by pushing their .", + "length": 66 + }, + { + "text": "could get around Lords rules requiring peers to declare financial .", + "length": 67 + }, + { + "text": "He indicated that he would use the promise of a trip to Fiji as a .", + "length": 67 + }, + { + "text": "released a video of Lord Cunningham saying he would accept fees of .", + "length": 68 + }, + { + "text": "our institutions – and greater transparency is a key part of the .", + "length": 68 + }, + { + "text": "set to legislate to create a statutory register of lobbyists in this .", + "length": 70 + }, + { + "text": "misconduct could be forced to resign if enough of their constituents .", + "length": 70 + }, + { + "text": "peers for offering to assist undercover reporters who were posing as .", + "length": 70 + }, + { + "text": "which will apply to organisations affiliated to political parties as .", + "length": 70 + }, + { + "text": "the high life – and former senior police officer Lord Mackenzie of .", + "length": 70 + }, + { + "text": "session of Parliament – even though it was dropped from the recent .", + "length": 70 + }, + { + "text": "against the rules, parliamentary rules, in any event,' he told ITV's .", + "length": 70 + }, + { + "text": "be interested in accepting a retainer of £2,000 a month to lobby for .", + "length": 71 + }, + { + "text": "promised plans for a power of recall, 'where any MP guilty of serious .", + "length": 71 + }, + { + "text": "case at all because what’s alleged to have happened would have been .", + "length": 71 + }, + { + "text": "statutory register of lobbyists would have made no difference in this .", + "length": 71 + }, + { + "text": "as staffing and rent on premises - which could potentially inflate the .", + "length": 72 + }, + { + "text": "value of activities such as leaflet-printing is reflected when judging .", + "length": 72 + }, + { + "text": "I informed them the next day that I wanted nothing more to do with them.", + "length": 72 + }, + { + "text": "Cabinet Office Minister Francis Maude yesterday said the Government is .", + "length": 72 + }, + { + "text": "spending they must declare, organisations such as unions would have to .", + "length": 72 + }, + { + "text": "was also embroiled in a second, separate sting operation after telling .", + "length": 72 + }, + { + "text": "other’s clients, put down amendments in debates or write to ministers.", + "length": 72 + }, + { + "text": "whether parties have breached the £19 million cap on campaign spending .", + "length": 73 + }, + { + "text": "include not only the cost of printing a leaflet but also overheads such .", + "length": 73 + }, + { + "text": "their behalf, and that he could ‘get other people to ask questions as .", + "length": 73 + }, + { + "text": "the two years left before the next election was not long enough to give .", + "length": 73 + }, + { + "text": "that, working together, peers could ask parliamentary questions for each .", + "length": 74 + }, + { + "text": "target of the proposed changes to third-party election campaign funding, .", + "length": 74 + }, + { + "text": "the Police Superintendents’ Association, said he could arrange parties .", + "length": 74 + }, + { + "text": "reporters from BBC Panorama, who were posing as lobbyists, that he would .", + "length": 74 + }, + { + "text": "were all recorded discussing how, in return for payment, they could help .", + "length": 74 + }, + { + "text": "access is abused – further undermining the already weak public trust in .", + "length": 75 + }, + { + "text": "British politics’, Labour kicked out two peers and a third resigned from .", + "length": 76 + }, + { + "text": "interests by asking ‘a colleague who has nothing to do with it, “would .", + "length": 76 + }, + { + "text": "reporters he offered ‘value for money’ because he could introduce them .", + "length": 76 + }, + { + "text": "There will also be a crackdown on lobbyists, including a statutory register.", + "length": 76 + }, + { + "text": "£12,000 a month – £144,000 a year – to ask parliamentary questions on .", + "length": 77 + }, + { + "text": "minister Lord Cunningham – once known as ‘Junket Jack’ for his love of .", + "length": 78 + }, + { + "text": "He added: ‘I told them I always stick to the rules and declare any interests.", + "length": 79 + }, + { + "text": "going to do this,’ he said, adding that he would be ‘astonished’ if the .", + "length": 79 + }, + { + "text": "’ Lord Mackenzie said he was ‘quite happy’ that he had not broken the rules.", + "length": 82 + }, + { + "text": "Mr Mercer said he agreed to be a consultant for work he said was outside parliament.", + "length": 84 + }, + { + "text": "Matt Chorley, Mailonline Political Editor and Tim Shipman, Deputy Political Editor .", + "length": 84 + }, + { + "text": "As the MP signs the contract he is recorded saying: 'Let's sign this, with pleasure.", + "length": 84 + }, + { + "text": "'That is a lobbying operation to stop us cracking down properly on the Cayman Islands.", + "length": 86 + }, + { + "text": "The MP allegedly signs a contract with the fake lobbying company set up by BBC Panorama .", + "length": 89 + }, + { + "text": "Deputy Prime Minister Nick Clegg today promised to act to deal with the 'murkier side of .", + "length": 90 + }, + { + "text": "Mr Clegg said earlier that fresh allegations of wrongdoing highlighted the need for action.", + "length": 91 + }, + { + "text": "Lord Oakeshott called for an end to foreign governments bankrolling All Party Parliamentary Groups.", + "length": 99 + }, + { + "text": "Deputy Prime Minister Nick Clegg promised action to clean up 'murkier' aspects of British politics .", + "length": 100 + }, + { + "text": "The crackdown comes after three members of the House of Lords were embroiled in a new sleaze scandal.", + "length": 101 + }, + { + "text": "He later told MailOnline: 'I don't think we should have these groups paid for by foreign governments.", + "length": 101 + }, + { + "text": "Leader of the House Lord Hill of Oareford described the allegations as 'very serious and distressing'.", + "length": 102 + }, + { + "text": "' He singled out the Cayman Islands group, which receives support from the government in the renowned tax haven.", + "length": 112 + }, + { + "text": "Lord Cunningham referred his own case to the Lords Commissioner for Standards and said he was consulting lawyers.", + "length": 113 + }, + { + "text": "But today Mr Maude admitted the register would not have prevented the cases which have made headlines in recent days.", + "length": 117 + }, + { + "text": "The House of Lords is a 'stinking swamp' of corruption dominated by 'dirty money', a senior Lib Dem peer claimed today.", + "length": 119 + }, + { + "text": "He said a register of lobbyists would be useful 'as far as it goes' but Lords and party funding reform was also needed.", + "length": 119 + }, + { + "text": "However, Labour dismissed the plans as 'shabby' attempt to deflect attention from Tory embarrassment over the lobbying scandal.", + "length": 127 + }, + { + "text": "But he submitted five parliamentary questions which were all answered, as well as an early day motion, all in relation to Fiji.", + "length": 127 + }, + { + "text": "'It's the political equivalent of groundhog day: MPs accused of abusing their position; businesses of getting too close,' he said.", + "length": 130 + }, + { + "text": "’ Patrick Mercer MP has resigned as images of him meeting an undercover BBC Panorama reporter posing as a lobbyist were released .", + "length": 132 + }, + { + "text": "Shadow Defence Secretary Jim Murphy said the latest claims were ‘a new low for British politics’ and voters would be ‘sickened’.", + "length": 136 + }, + { + "text": "'This seems to be a shabby and panicked response by David Cameron to divert attention from a set of damaging headlines hitting the Conservative Party.", + "length": 150 + }, + { + "text": "Confrontation: The move to use new lobbying laws to target trade unions, which have opposed government spending cuts, is likely to prove controversial .", + "length": 152 + }, + { + "text": "' He said rules must be changed to allow disgraced peers to be suspended for up to 10 years and called for all-party parliamentary groups to be reviewed.", + "length": 153 + }, + { + "text": "A Labour source: 'The best way to proceed if you want to take big money out of politics and clean up the lobbying scandal is to act on a cross-party basis.", + "length": 155 + }, + { + "text": "Last night it was suggested that a legal complaint is expected to be issued to the police so Mr Mercer can be investigated for breaches of the Bribery Act.", + "length": 155 + }, + { + "text": "Under the new proposals, unions will be required to carry out an annual audit of their membership and demonstrate that the figures they produce are accurate.", + "length": 157 + }, + { + "text": "‘I went back to my office, I checked the codes of conduct and  I decided that it was getting a bit near the mark and I decided to decline the offer,’ he said.", + "length": 163 + }, + { + "text": "The Certification Officer will be given the power to conduct investigations into the numbers produced, which are vital when ballots on strike action are conducted.", + "length": 163 + }, + { + "text": "The coalition is to go to war with the trade unions, using reforms in the wake of the lobbying scandal to open up the membership of Labour's biggest financial backers.", + "length": 167 + }, + { + "text": "'All-party groups like that make it more difficult for HMRC and the British government to collect the full amount of tax and make sure foreign companies pay the right tax.", + "length": 171 + }, + { + "text": "But the new laws will also include changes to election funding rules and tougher checks on union membership in an attempt to curb the influence of unions on British politics.", + "length": 174 + }, + { + "text": "But the bill creating a lobbyists register will also include measures to end self-certification of union membership and reform third-party contributions to election campaigns.", + "length": 175 + }, + { + "text": "Today it was announced that the three peers will be investigated by the House of Lords standards commissioner has begun investigating the three peers caught up in a lobbying scandal.", + "length": 182 + }, + { + "text": "The House of Lords code of conduct says peers ‘must not seek to profit from membership of the House by accepting or agreeing to accept payment for providing parliamentary advice or services’.", + "length": 195 + }, + { + "text": "Laird added: ‘I wish to make it clear that I did not agree to act as a paid advocate in any proceedings of the House, nor did I accept payment or other incentive or reward in return for providing parliamentary advice or services.", + "length": 231 + }, + { + "text": "’ That undercover operation – in tandem with a second national newspaper – was the same one that exposed Newark MP Patrick Mercer, who resigned from the Conservative Party on Friday after accepting £4,000 to ask parliamentary questions.", + "length": 243 + }, + { + "text": "‘The public, who have looked on with a sense of astonishment and with a sense that there is one rule for those who govern and another set of rules for those who are governed, will just be utterly sickened by it, and they are right to be sickened,’ he said.", + "length": 260 + }, + { + "text": "Downing Street revealed legislation will be rushed out before the summer recess, creating a new statutory register of lobbyists after an MP and three peers were embroiled in a new sleaze scandal after being recorded by undercover reporters offering to publicise causes in exchange for money.", + "length": 291 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6025386452674866 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:31.143213036Z", + "first_section_created": "2025-12-23T09:35:31.143738258Z", + "last_section_published": "2025-12-23T09:35:31.144290881Z", + "all_results_received": "2025-12-23T09:35:31.29424901Z", + "output_generated": "2025-12-23T09:35:31.29448592Z", + "total_processing_time_ms": 151, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 149, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:31.143738258Z", + "publish_time": "2025-12-23T09:35:31.143982668Z", + "first_worker_start": "2025-12-23T09:35:31.144448187Z", + "last_worker_end": "2025-12-23T09:35:31.218817Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:31.144569892Z", + "start_time": "2025-12-23T09:35:31.144656596Z", + "end_time": "2025-12-23T09:35:31.1447589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:31.144871Z", + "start_time": "2025-12-23T09:35:31.145024Z", + "end_time": "2025-12-23T09:35:31.218817Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:31.14449929Z", + "start_time": "2025-12-23T09:35:31.144569592Z", + "end_time": "2025-12-23T09:35:31.144708398Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:31.144373584Z", + "start_time": "2025-12-23T09:35:31.144448187Z", + "end_time": "2025-12-23T09:35:31.144487089Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:31.14403917Z", + "publish_time": "2025-12-23T09:35:31.144207077Z", + "first_worker_start": "2025-12-23T09:35:31.144695898Z", + "last_worker_end": "2025-12-23T09:35:31.254433Z", + "total_journey_time_ms": 110, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:31.144689897Z", + "start_time": "2025-12-23T09:35:31.1447514Z", + "end_time": "2025-12-23T09:35:31.144924407Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:31.145936Z", + "start_time": "2025-12-23T09:35:31.146073Z", + "end_time": "2025-12-23T09:35:31.254433Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 108 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:31.144823403Z", + "start_time": "2025-12-23T09:35:31.144902506Z", + "end_time": "2025-12-23T09:35:31.145008711Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:31.144646996Z", + "start_time": "2025-12-23T09:35:31.144695898Z", + "end_time": "2025-12-23T09:35:31.144733799Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 2, + "creation_time": "2025-12-23T09:35:31.144242679Z", + "publish_time": "2025-12-23T09:35:31.144290881Z", + "first_worker_start": "2025-12-23T09:35:31.1447525Z", + "last_worker_end": "2025-12-23T09:35:31.293439Z", + "total_journey_time_ms": 149, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:31.144887806Z", + "start_time": "2025-12-23T09:35:31.144911907Z", + "end_time": "2025-12-23T09:35:31.144927807Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:31.22371Z", + "start_time": "2025-12-23T09:35:31.223832Z", + "end_time": "2025-12-23T09:35:31.293439Z", + "queue_wait_time_ms": 79, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:31.14499611Z", + "start_time": "2025-12-23T09:35:31.145049712Z", + "end_time": "2025-12-23T09:35:31.145258021Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:31.144727399Z", + "start_time": "2025-12-23T09:35:31.1447525Z", + "end_time": "2025-12-23T09:35:31.1447627Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 3, + "total_processing_ms": 250, + "min_processing_ms": 69, + "max_processing_ms": 108, + "avg_processing_ms": 83, + "median_processing_ms": 73, + "total_queue_wait_ms": 81, + "avg_queue_wait_ms": 27 + }, + "topn": { + "worker_type": "topn", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 3, + "average_section_size": 3621, + "slowest_section_id": 2, + "slowest_section_time_ms": 149 + } +} diff --git a/data/output/00611a031b1863643d38a007ed01ce5f40e88c79.json b/data/output/00611a031b1863643d38a007ed01ce5f40e88c79.json new file mode 100644 index 0000000..7b252d3 --- /dev/null +++ b/data/output/00611a031b1863643d38a007ed01ce5f40e88c79.json @@ -0,0 +1,496 @@ +{ + "file_name": "00611a031b1863643d38a007ed01ce5f40e88c79.txt", + "total_words": 1082, + "top_n_words": [ + { + "word": "the", + "count": 63 + }, + { + "word": "in", + "count": 41 + }, + { + "word": "and", + "count": 35 + }, + { + "word": "of", + "count": 26 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "iraq", + "count": 24 + }, + { + "word": "said", + "count": 22 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "s", + "count": 19 + }, + { + "word": "bush", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "security needs.", + "length": 15 + }, + { + "text": "troops in Iraq.", + "length": 15 + }, + { + "text": "Bush said the U.", + "length": 16 + }, + { + "text": "Join The Forum .", + "length": 16 + }, + { + "text": "Some 4,500 other U.", + "length": 19 + }, + { + "text": "Debate the Iraq issue!", + "length": 22 + }, + { + "text": "troops home from Iraq.", + "length": 22 + }, + { + "text": "military officer in Iraq.", + "length": 25 + }, + { + "text": "Bush also emphasized the U.", + "length": 27 + }, + { + "text": "\"This is a vast country,\" he said.", + "length": 34 + }, + { + "text": "Democratic presidential nominee Sen.", + "length": 36 + }, + { + "text": "David Petraeus, the highest-ranking U.", + "length": 38 + }, + { + "text": "service members will go to Afghanistan.", + "length": 39 + }, + { + "text": "\" At present, there are about 146,000 U.", + "length": 40 + }, + { + "text": "Its democratic institutions are fragile.", + "length": 40 + }, + { + "text": "And if the progress in Iraq continues to hold, Gen.", + "length": 51 + }, + { + "text": "In his speech, Bush praised other members of the U.", + "length": 51 + }, + { + "text": "Watch Bush announce the troop reduction in Iraq » .", + "length": 52 + }, + { + "text": "Obama criticized the timing and scope of Bush's move.", + "length": 53 + }, + { + "text": "\"Yet they are all theaters in the same overall struggle.", + "length": 56 + }, + { + "text": "Democrats were less than enthusiastic about Bush's announcement.", + "length": 64 + }, + { + "text": "Bush described challenges in Afghanistan that don't exist in Iraq.", + "length": 66 + }, + { + "text": "troops from \"less than 21,000 two years ago to nearly 31,000 today.", + "length": 67 + }, + { + "text": "And in February of 2009, another Army combat brigade will come home.", + "length": 68 + }, + { + "text": "President Bush said Tuesday that he soon will start bringing some U.", + "length": 68 + }, + { + "text": "\" Skelton said Iraq \"cannot continue to overshadow other critical U.", + "length": 68 + }, + { + "text": "Ike Skelton, D-Missouri, chairman of the House Armed Services Committee.", + "length": 72 + }, + { + "text": "The source said five people saw the plan before it went to the president.", + "length": 73 + }, + { + "text": "An adviser to Iraqi Prime Minister Nuri al-Maliki welcomed Bush's decision.", + "length": 75 + }, + { + "text": "\"Unlike Iraq, it has few natural resources and has an underdeveloped infrastructure.", + "length": 84 + }, + { + "text": "And its enemies are some of the most hardened terrorists and extremists in the world.", + "length": 85 + }, + { + "text": "\"More significant troop reductions in Iraq are needed so that we can start to rebuild U.", + "length": 88 + }, + { + "text": "Through early next year, about 8,000 American troops will leave Iraq and not be replaced.", + "length": 89 + }, + { + "text": "Bush said stepped-up insurgent efforts in Afghanistan have necessitated the increase of U.", + "length": 90 + }, + { + "text": "\"By November, we will bring home a Marine battalion that is now serving in Anbar province.", + "length": 90 + }, + { + "text": "\"This amounts to about 8,000 additional American troops returning home without replacement.", + "length": 91 + }, + { + "text": "military readiness and provide the additional forces needed to finish the fight in Afghanistan.", + "length": 95 + }, + { + "text": "\"For all the good work we have done in that country, it is clear we must do even more,\" he said.", + "length": 96 + }, + { + "text": "Bush adopted the entire recommendation from Petraeus, a senior military official in Iraq told CNN.", + "length": 98 + }, + { + "text": "He said Australia has \"withdrawn its battle group\" and Polish troops are \"set to redeploy shortly.", + "length": 98 + }, + { + "text": "would make additional forces available in 2009 and called on allies to increase their force levels.", + "length": 99 + }, + { + "text": "-led coalition, saying many of those nations will be able to end their deployments to Iraq this year.", + "length": 101 + }, + { + "text": "\" \"The effort in Afghanistan must move to the forefront and once again become our top priority,\" he said.", + "length": 105 + }, + { + "text": "remarks just as strongly on Afghanistan, where al Qaeda and Taliban militants have been making a comeback.", + "length": 106 + }, + { + "text": "Petraeus and our military leaders believe additional reductions will be possible in the first half of 2009.", + "length": 107 + }, + { + "text": "intention to help Pakistan defeat insurgents who are using the country's tribal areas to stage attacks in Afghanistan.", + "length": 118 + }, + { + "text": "\" Bush said he is making the Iraqi troop withdrawal decision based on a recommendation from top military officers, including Gen.", + "length": 129 + }, + { + "text": "\" However, \"we will continue to spend $10 billion a month in Iraq while the Iraqi government sits on a $79 billion surplus,\" Obama said.", + "length": 136 + }, + { + "text": "The plan \"may seem to signal movement in the right direction,\" but it \"defers troop reductions until the next administration,\" said Rep.", + "length": 136 + }, + { + "text": "\"As we learned in Iraq, the best way to restore the confidence of the people is to restore basic security -- and that requires more troops.", + "length": 139 + }, + { + "text": "Barack Obama praised Bush for announcing additional troops for Afghanistan and \"moving in the direction of the policy that I have advocated for years.", + "length": 150 + }, + { + "text": "\"Iraq, Afghanistan and parts of Pakistan pose unique challenges for our country,\" Bush said Tuesday in a speech at the National Defense University in Washington.", + "length": 161 + }, + { + "text": "\" He said that a Marine battalion of around 1,000 will deploy to Afghanistan in November instead of Iraq and that an Army combat brigade of around 3,500 will go in January.", + "length": 172 + }, + { + "text": "\"His plan comes up short -- it is not enough troops, and not enough resources, with not enough urgency,\" the senator from Illinois said of Bush's call for more troops in Afghanistan.", + "length": 182 + }, + { + "text": "\"In the absence of a timetable to remove our combat brigades, we will continue to give Iraq's leaders a blank check instead of pressing them to reconcile their differences,\" he said.", + "length": 182 + }, + { + "text": "WASHINGTON (CNN) -- President Bush on Tuesday announced a troop deployment shift for America's two wars, a move that reflects a more stable Iraq and an increasingly volatile Afghanistan.", + "length": 186 + }, + { + "text": "\"These agreements will serve as the foundation for America's continued security support to Iraq once the United Nations resolution authorizing the multinational forces there expires on December 31.", + "length": 197 + }, + { + "text": "\" The president said Iraq and the United States will work \"toward the conclusion of a strategic framework agreement and a status of forces agreement,\" pacts that will spell out the terms of their relationship.", + "length": 209 + }, + { + "text": "\" He said these troop increases and those by allies, including Britain, France, Poland, Bulgaria, Romania, Australia, Germany, Denmark and the Czech Republic, have resulted in what he calls a \"quiet surge\" in Afghanistan.", + "length": 221 + }, + { + "text": "\"He and the Joint Chiefs of Staff have recommended that we move forward with additional force reductions,\" the president said, citing military and political strides in stabilizing the country and dramatically bringing down violence.", + "length": 232 + }, + { + "text": "\" He said Americans will help develop Afghan security forces and are improving efforts on the civilian side, adding more personnel to deal with issues of diplomacy, development, the rural economy and the fight against the drug trade.", + "length": 233 + }, + { + "text": "\"Over the next several months, we will bring home about 3,400 combat support forces -- including aviation personnel, explosive ordnance teams, combat and construction engineers, military police and logistical support forces,\" he said.", + "length": 234 + }, + { + "text": "\"We look at this step as a positive step that there is stability in Iraq, there is a real improvement in the security situation in Iraq and there is a real improvement in the capability of the Iraqi security forces in protecting and keeping the security in Iraq,\" said Sadiq al-Rikabi, al-Maliki's political adviser.", + "length": 316 + }, + { + "text": "In explaining progress in the war effort, Bush cited the \"surge\" offensive, winning the hearts and minds of Sunni tribes, Iraqi political reconciliation efforts, economic improvements, an improved Iraqi army leading the fight against Shiite and Sunni insurgents, and a return of hundreds of doctors who fled the fighting.", + "length": 321 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.44855113327503204 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:31.645060184Z", + "first_section_created": "2025-12-23T09:35:31.645421899Z", + "last_section_published": "2025-12-23T09:35:31.645669109Z", + "all_results_received": "2025-12-23T09:35:31.763504304Z", + "output_generated": "2025-12-23T09:35:31.763760515Z", + "total_processing_time_ms": 118, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 117, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:31.645421899Z", + "publish_time": "2025-12-23T09:35:31.645614507Z", + "first_worker_start": "2025-12-23T09:35:31.646436641Z", + "last_worker_end": "2025-12-23T09:35:31.716398Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:31.646370138Z", + "start_time": "2025-12-23T09:35:31.646436641Z", + "end_time": "2025-12-23T09:35:31.646537945Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:31.646646Z", + "start_time": "2025-12-23T09:35:31.646818Z", + "end_time": "2025-12-23T09:35:31.716398Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:31.646428141Z", + "start_time": "2025-12-23T09:35:31.646495744Z", + "end_time": "2025-12-23T09:35:31.646586047Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:31.646328937Z", + "start_time": "2025-12-23T09:35:31.646475743Z", + "end_time": "2025-12-23T09:35:31.646521545Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:31.645634108Z", + "publish_time": "2025-12-23T09:35:31.645669109Z", + "first_worker_start": "2025-12-23T09:35:31.646507144Z", + "last_worker_end": "2025-12-23T09:35:31.76261Z", + "total_journey_time_ms": 116, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:31.646466542Z", + "start_time": "2025-12-23T09:35:31.646546646Z", + "end_time": "2025-12-23T09:35:31.646597348Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:31.646709Z", + "start_time": "2025-12-23T09:35:31.646829Z", + "end_time": "2025-12-23T09:35:31.76261Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 115 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:31.646557746Z", + "start_time": "2025-12-23T09:35:31.646602948Z", + "end_time": "2025-12-23T09:35:31.646670251Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:31.646469743Z", + "start_time": "2025-12-23T09:35:31.646507144Z", + "end_time": "2025-12-23T09:35:31.646521545Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 184, + "min_processing_ms": 69, + "max_processing_ms": 115, + "avg_processing_ms": 92, + "median_processing_ms": 115, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3278, + "slowest_section_id": 1, + "slowest_section_time_ms": 116 + } +} diff --git a/data/output/00611c530db65df5cd6f2cd4fc9307567bd6cfc1.json b/data/output/00611c530db65df5cd6f2cd4fc9307567bd6cfc1.json new file mode 100644 index 0000000..48cd999 --- /dev/null +++ b/data/output/00611c530db65df5cd6f2cd4fc9307567bd6cfc1.json @@ -0,0 +1,318 @@ +{ + "file_name": "00611c530db65df5cd6f2cd4fc9307567bd6cfc1.txt", + "total_words": 497, + "top_n_words": [ + { + "word": "a", + "count": 23 + }, + { + "word": "the", + "count": 22 + }, + { + "word": "is", + "count": 17 + }, + { + "word": "kitty", + "count": 16 + }, + { + "word": "she", + "count": 13 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "hello", + "count": 11 + }, + { + "word": "cat", + "count": 10 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "for", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Meow.", + "length": 5 + }, + { + "text": "Wrong.", + "length": 6 + }, + { + "text": "\" Whoa.", + "length": 7 + }, + { + "text": "She is a friend.", + "length": 16 + }, + { + "text": "What does it mean?", + "length": 18 + }, + { + "text": "How is it possible?", + "length": 19 + }, + { + "text": "But she is not a cat.", + "length": 21 + }, + { + "text": "She is a little girl.", + "length": 21 + }, + { + "text": "\" tweeted @mrsunlawyer.", + "length": 23 + }, + { + "text": "\" @NotKennyRogers tweeted.", + "length": 26 + }, + { + "text": "\"Hello Kitty is not a cat.", + "length": 26 + }, + { + "text": "She's a cartoon character.", + "length": 26 + }, + { + "text": "\" Yes, she's also British.", + "length": 26 + }, + { + "text": "\"Hello Kitty is not a cat.", + "length": 26 + }, + { + "text": "\" She may have lost a few today.", + "length": 32 + }, + { + "text": "She's never depicted on all fours.", + "length": 34 + }, + { + "text": "She walks and sits like a two-legged creature.", + "length": 46 + }, + { + "text": "\"Since Hello Kitty isn't a cat, wtf is My Melody?", + "length": 49 + }, + { + "text": "\" At last count it was retweeted more than 13,000 times.", + "length": 56 + }, + { + "text": "Her favorite saying is \"You can never have too many friends.", + "length": 60 + }, + { + "text": "\" For some, the news raised more questions than it answered.", + "length": 60 + }, + { + "text": "\"I just got off stage to find out that Hello Kitty is not a cat.", + "length": 64 + }, + { + "text": "She does have a pet cat of her own, however, and it's called Charmmy Kitty.", + "length": 75 + }, + { + "text": "Hong Kong (CNN) -- Six simple words have sent Hello Kitty lovers into a spin.", + "length": 77 + }, + { + "text": "The story started innocently enough before the bombshell was dropped by Christine R.", + "length": 84 + }, + { + "text": "\"That's one correction Sanrio made for my script for the show,\" Yano told the LA Times.", + "length": 87 + }, + { + "text": "As the Sanrio website clearly states, Hello Kitty is a \"cheerful and happy little girl ...", + "length": 90 + }, + { + "text": "who lives in London with her mama (Mary White), papa (George White), and her twin sister Mimmy.", + "length": 95 + }, + { + "text": "\"Been tossing and turning for the last few hours trying to figure out how Hello Kitty isn't a cat.", + "length": 98 + }, + { + "text": "This is worse than finding out Pluto is not a planet,\" tweeted clearly shocked rapper Mike Shinoda.", + "length": 99 + }, + { + "text": "Until now, her pointy ears and whiskers gave her legion of fans the distinct impression she was feline.", + "length": 103 + }, + { + "text": "The news reached far and wide, including backstage after the Linkin Park gig at the Minnesota State Fair.", + "length": 105 + }, + { + "text": "\" Singer Katy Perry stepped in to try to calm the masses: \"IT'S OKAY HELLO KITTY FANS, KITTY PURRY IS A CAT.", + "length": 108 + }, + { + "text": "For the record, Kitty's birthday is November 1, she likes baking and making pancakes, origami and eating apple pie.", + "length": 115 + }, + { + "text": "Yano, an anthropologist at the University of Hawaii, who has delved more deeply than most into the Hello Kitty phenomenon.", + "length": 122 + }, + { + "text": "In the last 40 years her button nose has appeared on a dazzling array of merchandise, generating billions of dollars for the company.", + "length": 133 + }, + { + "text": "For those who don't know, Hello Kitty is an international superstar who was introduced to the world in 1974 by Japanese company Sanrio.", + "length": 135 + }, + { + "text": "Summing up the disbelief, @jkltoraay tweeted: \"You cannot say hello kitty is not a cat after 40 years no human has whiskers and pointed ears and a little yellow nose.", + "length": 166 + }, + { + "text": "Users raced to update Kitty's Wikipedia entry, which now reads: \"She bears the appearance of a white Japanese bobtail cat with a red bow although she is actually a little girl.", + "length": 176 + }, + { + "text": "\" The apparently shocking revelation was made in an LA Times article published Wednesday about a retrospective of Kitty paraphernalia opening next month at the Japanese American National Museum.", + "length": 194 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5471940040588379 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:32.146832129Z", + "first_section_created": "2025-12-23T09:35:32.147178343Z", + "last_section_published": "2025-12-23T09:35:32.147370251Z", + "all_results_received": "2025-12-23T09:35:32.208352584Z", + "output_generated": "2025-12-23T09:35:32.208539592Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:32.147178343Z", + "publish_time": "2025-12-23T09:35:32.147370251Z", + "first_worker_start": "2025-12-23T09:35:32.147904173Z", + "last_worker_end": "2025-12-23T09:35:32.207497Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:32.147958575Z", + "start_time": "2025-12-23T09:35:32.148022178Z", + "end_time": "2025-12-23T09:35:32.148089081Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:32.148158Z", + "start_time": "2025-12-23T09:35:32.148316Z", + "end_time": "2025-12-23T09:35:32.207497Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:32.147909973Z", + "start_time": "2025-12-23T09:35:32.147985777Z", + "end_time": "2025-12-23T09:35:32.14806088Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:32.14782887Z", + "start_time": "2025-12-23T09:35:32.147904173Z", + "end_time": "2025-12-23T09:35:32.147931174Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2759, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/00612571e10b2e5f4f92b410ee3b691a0f4eb2b3.json b/data/output/00612571e10b2e5f4f92b410ee3b691a0f4eb2b3.json new file mode 100644 index 0000000..c1f97e4 --- /dev/null +++ b/data/output/00612571e10b2e5f4f92b410ee3b691a0f4eb2b3.json @@ -0,0 +1,346 @@ +{ + "file_name": "00612571e10b2e5f4f92b410ee3b691a0f4eb2b3.txt", + "total_words": 819, + "top_n_words": [ + { + "word": "the", + "count": 61 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "is", + "count": 14 + }, + { + "word": "now", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "egg", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Why now?", + "length": 8 + }, + { + "text": "Ill effects?", + "length": 12 + }, + { + "text": "If there were risks, it didn't matter.", + "length": 38 + }, + { + "text": "'It was definitely the right thing to do.", + "length": 41 + }, + { + "text": "'I think it was the only thing that helped me.", + "length": 46 + }, + { + "text": "halted the use of the largely untested technique.", + "length": 49 + }, + { + "text": "However, the treatment has now been banned in the U.", + "length": 52 + }, + { + "text": "Today, Emma is a healthy teenage girl in high school.", + "length": 53 + }, + { + "text": "According to the Independent, Alana has not been tested.", + "length": 56 + }, + { + "text": "Before the technique could become more widespread, the U.", + "length": 57 + }, + { + "text": "Only two of those children, both now in their teens, have been named.", + "length": 69 + }, + { + "text": "The fertilized embryo is then transferred into the womb of the mother.", + "length": 70 + }, + { + "text": "But instead of boosting fertility, scientists in Britain as well as the U.", + "length": 74 + }, + { + "text": "I wanted a child too much at that point, Mrs Saarinen told The Independent.", + "length": 75 + }, + { + "text": "and is currently under investigation for possible health risks to the child.", + "length": 76 + }, + { + "text": "That left the egg carrying the DNA of three parents: donor, recipient and father.", + "length": 81 + }, + { + "text": "She sits at home with her mother Sharon, who says she does not regret the decision .", + "length": 84 + }, + { + "text": "About one in 200 children is born every year in Britain with a mitochondrial disorder.", + "length": 86 + }, + { + "text": "About one in 200 children is born every year in Britain with a mitochondrial disorder.", + "length": 86 + }, + { + "text": "In Emma, researchers said there was no lasting trace of the donor's mitochondrial DNA.", + "length": 86 + }, + { + "text": "' Mrs Saarinen's daughter Alana is now a healthy teenager who is doing well in high school.", + "length": 91 + }, + { + "text": "However, some of the some 30 three-parent babies conceived worldwide did retain a third DNA.", + "length": 92 + }, + { + "text": "A call made to the Institute for Reproductive Medicine and Science by MailOnline was not immediately returned.", + "length": 110 + }, + { + "text": "For a woman with faulty mitochondria, scientists take only the healthy genetic material from her egg or embryo.", + "length": 111 + }, + { + "text": "Before it was banned, the technique helped up to 30 women conceive, about 17 of which came from Saint Barnabas.", + "length": 111 + }, + { + "text": "Some groups oppose artificial reproduction techniques and believe the destruction of eggs or embryos to be immoral.", + "length": 115 + }, + { + "text": "According to the Independent, neither has been checked for long-term health issues that could be related to the technique.", + "length": 122 + }, + { + "text": "What it also did was transfer the donor's mitochondrial DNA--which is passed along only by mothers--into the fertilized egg.", + "length": 124 + }, + { + "text": "'The right thing to do': Alana Saarinen (right) was conceived through a controversial technique called cytoplasmic transfer.", + "length": 124 + }, + { + "text": "But there are concerns that the treatment compromises the health of the child, many of which inherit DNA from three parents.", + "length": 124 + }, + { + "text": "Now, well over a decade later, the clinic is reportedly looking into long-term effects on the children for the very first time .", + "length": 128 + }, + { + "text": "now want to transfer mitochondrial DNA as a means of preventing mitochondrial diseases from being passed on from mother to child.", + "length": 129 + }, + { + "text": "They then transfer that into a donor egg or embryo that still has its healthy mitochondria but has had the rest of its key DNA removed.", + "length": 135 + }, + { + "text": "A mother has defended her choice to undergo a controversial fertility treatment that helped her get pregnant with her now-teenage daughter.", + "length": 139 + }, + { + "text": "Babies born with the help of a New Jersey fertility clinic using a technique that put the genes of three parents into an egg are now teenagers .", + "length": 144 + }, + { + "text": "Sharon Saarinen became pregnant via a process called cytoplasmic transfer, which involves injected a donor egg into the mother's egg to boost the egg's health.", + "length": 159 + }, + { + "text": "Around 17 babies were conceived at the Institute for Reproductive Medicine and Science at Saint Barnabas Medical Center using cytoplasmic transfer, a now banned technique.", + "length": 172 + }, + { + "text": "Reports of their investigation into children like Emma and Alana come just as Britain is considering legalizing more advanced and refined three-parent conception techniques.", + "length": 173 + }, + { + "text": "One of them Alana and the other is Emma Ott, a Pittsburgh girl who became the first ever child born after being conceived by the 3-parent technique when she arrived happy and healthy in 1997.", + "length": 191 + }, + { + "text": "The Institute for Reproductive Medicine and Science at Saint Barnabas Medical Center in New Jersey is now investigating what - if any - are the ill effects of the now-halted fertility technique .", + "length": 195 + }, + { + "text": "At the time Alana and Emma were conceived, scientists were using cytoplasmic transfer in the hopes it would increase the chances of successful conception in patients for whom traditional IVF had been unsuccessful.", + "length": 213 + }, + { + "text": "The new techniques help women with faulty mitochondria, the energy source in a cell, from passing on to their babies defects that can result in such diseases as muscular dystrophy, epilepsy, heart problems and mental retardation.", + "length": 229 + }, + { + "text": "Hopeful: The new techniques help women with faulty mitochondria, the energy source in a cell, from passing on to their babies defects that can result in such diseases as muscular dystrophy, epilepsy, heart problems and mental retardation.", + "length": 238 + }, + { + "text": "British tabloids jumped on the procedure when it was first announced in 2008 and labeled it the creation of a three-parent baby — the mother, the donor and the father — a charge scientists claim is inaccurate because the amount of DNA from the donor egg is insignificant.", + "length": 275 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.46861889958381653 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:32.648164455Z", + "first_section_created": "2025-12-23T09:35:32.648488069Z", + "last_section_published": "2025-12-23T09:35:32.648700677Z", + "all_results_received": "2025-12-23T09:35:32.710943163Z", + "output_generated": "2025-12-23T09:35:32.711120971Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:32.648488069Z", + "publish_time": "2025-12-23T09:35:32.648700677Z", + "first_worker_start": "2025-12-23T09:35:32.649391106Z", + "last_worker_end": "2025-12-23T09:35:32.710072Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:32.649317603Z", + "start_time": "2025-12-23T09:35:32.649410707Z", + "end_time": "2025-12-23T09:35:32.649520812Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:32.649542Z", + "start_time": "2025-12-23T09:35:32.649702Z", + "end_time": "2025-12-23T09:35:32.710072Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:32.649319003Z", + "start_time": "2025-12-23T09:35:32.649391106Z", + "end_time": "2025-12-23T09:35:32.64948271Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:32.649325903Z", + "start_time": "2025-12-23T09:35:32.649391306Z", + "end_time": "2025-12-23T09:35:32.649428808Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4891, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/006129dc2d2800b0dfb9b61501fb82c9aecf6bad.json b/data/output/006129dc2d2800b0dfb9b61501fb82c9aecf6bad.json new file mode 100644 index 0000000..1d40754 --- /dev/null +++ b/data/output/006129dc2d2800b0dfb9b61501fb82c9aecf6bad.json @@ -0,0 +1,704 @@ +{ + "file_name": "006129dc2d2800b0dfb9b61501fb82c9aecf6bad.txt", + "total_words": 1721, + "top_n_words": [ + { + "word": "the", + "count": 104 + }, + { + "word": "to", + "count": 47 + }, + { + "word": "and", + "count": 37 + }, + { + "word": "arsenal", + "count": 32 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "for", + "count": 27 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "but", + "count": 21 + }, + { + "word": "his", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "Campbell.", + "length": 9 + }, + { + "text": "Away we go!", + "length": 11 + }, + { + "text": "Arsenal: TBA .", + "length": 14 + }, + { + "text": "What a start...", + "length": 15 + }, + { + "text": "Besiktas: TBA .", + "length": 15 + }, + { + "text": "45mins: Oh dear.", + "length": 16 + }, + { + "text": "Away we go again.", + "length": 17 + }, + { + "text": "Host commentator .", + "length": 18 + }, + { + "text": "That's some record.", + "length": 19 + }, + { + "text": "not taking pictures.", + "length": 20 + }, + { + "text": "but his shot is blocked.", + "length": 24 + }, + { + "text": "Hit them where it hurts!", + "length": 24 + }, + { + "text": "Demba Ba with the effort.", + "length": 25 + }, + { + "text": "Time to prove your point!", + "length": 25 + }, + { + "text": "And off the bar immediately!", + "length": 28 + }, + { + "text": "Well that was different, eh?", + "length": 28 + }, + { + "text": "47mins: Arsenal carved open.", + "length": 28 + }, + { + "text": "It hasn't been Olivier's half.", + "length": 30 + }, + { + "text": "But here's Arsenal on the break!", + "length": 32 + }, + { + "text": "Right decision from the referee.", + "length": 32 + }, + { + "text": "No shot from kick-off this time.", + "length": 32 + }, + { + "text": "Can they start on the front foot?", + "length": 33 + }, + { + "text": "Seems they just have to be patient.", + "length": 35 + }, + { + "text": "So much for entertainment, Arsenal...", + "length": 37 + }, + { + "text": "38mins: Great effort from Jack Wilshere.", + "length": 40 + }, + { + "text": "That was heading towards the top corner.", + "length": 40 + }, + { + "text": "Like for like swap with the Ox coming on.", + "length": 41 + }, + { + "text": "Looking really dangerous here, the hosts.", + "length": 41 + }, + { + "text": "And the atmosphere is incredible in Istanbul.", + "length": 45 + }, + { + "text": "80mins: Well this makes things interesting...", + "length": 45 + }, + { + "text": "Arsene Wenger's side struggling to get going.", + "length": 45 + }, + { + "text": "The keeper does well to dive and save and hold.", + "length": 47 + }, + { + "text": "Everything about him is measured and controlled.", + "length": 48 + }, + { + "text": "Arsenal in the ascendancy as the hosts sit back.", + "length": 48 + }, + { + "text": "5mins: All over the place at the minute Arsenal.", + "length": 48 + }, + { + "text": "' Arsenal fans will hope they do that quickly...", + "length": 48 + }, + { + "text": "13mins: But this is much better from Arsenal now.", + "length": 49 + }, + { + "text": "44mins: Rare lack of composure from Calum Chambers.", + "length": 51 + }, + { + "text": "Perhaps they should ask Liverpool for some advice...", + "length": 52 + }, + { + "text": "Final whistle goes, sorry about the lack of goals...", + "length": 52 + }, + { + "text": "Arsenal will surely be happy enough with a draw here.", + "length": 53 + }, + { + "text": "Chambers so full of composure for Arsenal at the back.", + "length": 54 + }, + { + "text": "It may be tough tonight though at the Olympic Stadium.", + "length": 54 + }, + { + "text": "55mins: It's almost like his job is collecting yellows.", + "length": 55 + }, + { + "text": "Unfortunately, it was like that in my case,’ he said.", + "length": 55 + }, + { + "text": "59mins: Neither team committing too much at the moment.", + "length": 55 + }, + { + "text": "30mins: Slight appeal for a penalty there for Besiktas.", + "length": 55 + }, + { + "text": "A shot straight from kick-off troubles Wojciech Szczesny.", + "length": 57 + }, + { + "text": "Arsenal’s midfield being over-run in the opening spell.", + "length": 57 + }, + { + "text": "Few shots from long range here and there, but little else.", + "length": 58 + }, + { + "text": "Besiktas quicker and sharper to the ball, more aggressive.", + "length": 58 + }, + { + "text": "' We know what Arsenal can bring, but what about Besiktas?", + "length": 58 + }, + { + "text": "12mins: Seems to be so much space for Arsenal on the right.", + "length": 59 + }, + { + "text": "25mins: Plenty of whistles as Arsenal try and build attacks.", + "length": 60 + }, + { + "text": "68mins: Finally Arsenal burst into life into the second half.", + "length": 61 + }, + { + "text": "18mins: This game settling down after a fast and frenetic start.", + "length": 64 + }, + { + "text": "Flamini replaces him, while Koscielny takes the captain's armband.", + "length": 66 + }, + { + "text": "88mins: That was absolutely brilliant from Alex Oxlade-Chamberlain.", + "length": 67 + }, + { + "text": "Mathieu Flamini goes into the book, a flurry of sorts at the moment.", + "length": 68 + }, + { + "text": "Arsenal would probably take the 0-0 if it were on offer at the moment.", + "length": 70 + }, + { + "text": "The hosts are playing nice quick football on this 'questionable' surface.", + "length": 73 + }, + { + "text": "10mins: Great ball from Ozyakup over the top of Calum Chambers to find Ba.", + "length": 74 + }, + { + "text": "Seems to be a thriller in Copenhagen too, with Bayer Leverkusen leading 3-2.", + "length": 76 + }, + { + "text": "Subs: Martinez, Bellerin, Miquel, Flamini, Rosicky, Oxlade-Chamberlain \u0026 J.", + "length": 76 + }, + { + "text": "' Arsenal have won all 12 of their previous Champions league play-off matches.", + "length": 78 + }, + { + "text": "83mins: Arsenal understandably sitting back now, its Besiktas' time to attack.", + "length": 78 + }, + { + "text": "Minutes later Aaron Ramsey pulls back Ozyakup and earns himself a cheap yellow.", + "length": 79 + }, + { + "text": "You would think Besiktas would have to come out and attack, being the home team.", + "length": 80 + }, + { + "text": "72mins: Second sub for Arsenal, as the hugely promising Alexis Sanchez goes off.", + "length": 80 + }, + { + "text": "The striker twists and turns and his shot wrong foots Szczesny, but it's just wide.", + "length": 83 + }, + { + "text": "This is his first Champions League game and he is clearly enjoying himself out there.", + "length": 85 + }, + { + "text": "Kerim Frei, formerly of Fulham, is on for Ozyakup who seemed to be losing his temper.", + "length": 85 + }, + { + "text": "They'll be back at the Emirates next week, and will be eager to make the group stage.", + "length": 85 + }, + { + "text": "It's been end-to-end in Istanbul, but both teams can't quite execute their final ball.", + "length": 86 + }, + { + "text": "Tolga, İsmail, Motta, Ersan, Pedro, Olcay, Veli, Necip, Oğuzhan, Mustafa, Demba Ba .", + "length": 86 + }, + { + "text": "Arsenal made it into half-time for refuge but the second-half has exactly the same feel.", + "length": 88 + }, + { + "text": "53mins: Lots of challenges going in so I'm surprised that's the first booking of the game.", + "length": 90 + }, + { + "text": "Chambers tracks Veli and doesn't get the ball, but the player wasn't going for it himself.", + "length": 90 + }, + { + "text": "who has been somewhat sluggish so far, nearly picks out Alexis Sanchez with a dinked cross.", + "length": 91 + }, + { + "text": "Motta cynically takes out Alexis Sanchez on the halfway line as the Chilean broke past him.", + "length": 91 + }, + { + "text": "Both teams had chances in an open game, while Alex Oxlade-Chamberlain hit the post late on.", + "length": 91 + }, + { + "text": "'It is good to see he has made it to the top level and is now an important player in Turkey.", + "length": 92 + }, + { + "text": "Athletic Bilbao have just taken a surprise lead away at Napoli, Iker Muniain with that goal.", + "length": 92 + }, + { + "text": "Credit to Mathieu Debuchy, who has been brilliant going forward, and in defence on the right.", + "length": 93 + }, + { + "text": "A beautiful chipped ball finds the Franchman but he misses his left-footed volley completely.", + "length": 93 + }, + { + "text": "50mins: Worry for Arsenal with Everton up next, skipper Mikel Arteta hobbles off with a knock.", + "length": 94 + }, + { + "text": "Sanchez bursts clear but his cross is deflected and Giroud can't quite get it out of his feet.", + "length": 94 + }, + { + "text": "One player to look out for is Oğuzhan Özyakup who is a former youth product in North London.", + "length": 96 + }, + { + "text": "The youngster attempts a crossfield pass to his centre-back partner, but it's intercepted by Ba.", + "length": 96 + }, + { + "text": "90mins: Late sub for Arsenal, who will be happy with this scoreline now, despite that near miss.", + "length": 96 + }, + { + "text": "Team news to come, of course, and we will all have all the updates in the lead up to the big game.", + "length": 98 + }, + { + "text": "Hello and welcome to Sportsmail's coverage as Arsenal travel to Istanbul to play Besiktas tonight.", + "length": 98 + }, + { + "text": "Szczesny; Debuchy, Koscielny, Chambers, Monreal; Arteta, Ramsey; Cazorla, Wilshere, Sanchez; Giroud.", + "length": 100 + }, + { + "text": "Speaking to ITV before the game: 'It's windy and pitch is not the best at all, we will have to adapt.", + "length": 101 + }, + { + "text": "Tugs back Ozyakup after losing possession, and now the visitors have a real challenge on their hands.", + "length": 101 + }, + { + "text": "Concrete information should arrive shortly, but we can gather certain aspects about Arsenal's line up.", + "length": 102 + }, + { + "text": "Great passing down the right between Sanchez and Debuchy, and the right-backs pull-back finds Giroud...", + "length": 103 + }, + { + "text": "Aaron Ramsey picked up a cheap first yellow for tugging back his opponent, and he makes the same mistake.", + "length": 105 + }, + { + "text": "They have also scored eight goals in their last two games in Istanbul, against Fenerbahce and Galatasaray.", + "length": 106 + }, + { + "text": "‘Sometimes when you are at a club with a lot of big stars, the name counts for more than the statistics.", + "length": 106 + }, + { + "text": "Alexis Sanchez really does look in the mood, while Calum Chambers has started well at the back for Arsenal.", + "length": 107 + }, + { + "text": "Sanchez again involved, his cut back finds Santi Cazorla, whose shot zips past the post from a tight angle.", + "length": 107 + }, + { + "text": "Alexis Sanchez will be looking to score his first goal for the club, and don't forget about Aaron Ramsey...", + "length": 107 + }, + { + "text": "This time Aaron Ramsey tries to break clear but the referee doesn't play advantage after a trip on Sanchez.", + "length": 107 + }, + { + "text": "Alexis Sanchez has looked lively for the visitors, while Olivier Giroud should really have opened the scoring.", + "length": 110 + }, + { + "text": "As expected Demba Ba starts for the hosts, looking to build on his two goals in five games against the Gunners.", + "length": 111 + }, + { + "text": "The Senegalese striker connects well with his left foot on the volley, but the goalkeeper does well to palm it away.", + "length": 116 + }, + { + "text": "For Arsenal, Olivier Giroud comes in for the injured Yaya Sanogo, and similarly Nacho Monreal replaces Kieran Gibbs.", + "length": 116 + }, + { + "text": "The Champions League is back, and it was so nearly back with a bang with Demba Ba's five-second shot off the bar from kick-off.", + "length": 127 + }, + { + "text": "The midfielder plays a one-two with Giroud, and after looking up to see no one in front of him, curls towards goal from 25 yards.", + "length": 129 + }, + { + "text": "The visitors were playing the latter period of the game with ten men after Aaron Ramsey was sent off for two petulant yellow cards.", + "length": 131 + }, + { + "text": "Arsene Wenger has become accustomed to experiencing the group stages of the Champions League, but faces a tricky first leg tonight.", + "length": 131 + }, + { + "text": "Arsenal have a holding midfielder on the bench in Mathieu Flamimi and it looks like they could do with him in the early part of this game.", + "length": 138 + }, + { + "text": "Wenger said: 'I always thought he could make a career but at our club he had big competition in front of him and that is why we let him go.", + "length": 139 + }, + { + "text": "Relive Sportsmail's coverage of the Champions League as Arsenal travelled to Istanbul for the first leg of their play-off against Besiktas.", + "length": 139 + }, + { + "text": "'We are not here to take selfies with Arsenal on the pitch, we are here to battle to the end,' Bilic said at his pre-match press conference.", + "length": 140 + }, + { + "text": "66mins: Monreal joins Ramsey and Flamini in the book for Arsenal as he catches Mustafa late as the midfielder tried to play Demba Ba through.", + "length": 141 + }, + { + "text": "Dribbling in from the right-hand side, the midfielder unleashes a shot on his weaker foot which the goalkeeper does well to tip onto the post.", + "length": 142 + }, + { + "text": "The Chilean struggled somewhat in the 2-1 victory against Crystal Palace at the weekend, and could be rested for this weekend's trip to Everton.", + "length": 144 + }, + { + "text": "Approaching the 70th minute, Wenger has the likes of Rosicky, Oxlade-Chamberlain and Joel Campbell available should he wish to change things up.", + "length": 144 + }, + { + "text": "Worryingly also, he says Koscielny declared himself fit: 'Koscielny has inflamation but he has declared himself fit, you have to trust the player.", + "length": 146 + }, + { + "text": "Lukas Podolski may not be here (though there is still Wojciech Szczesny), but Besiktas manager Slavan Bilic says his side will be battling on the pitch...", + "length": 154 + }, + { + "text": "Arsenal probably had the best of the chances tonight, hitting the post late on, but after Aaron Ramsey's sending off will have to take a draw away from home.", + "length": 157 + }, + { + "text": "Calum Chambers is expected to make his Champions League debut after impressing in recent weeks, while it will be interesting to see whether Alexis Sanchez starts.", + "length": 162 + }, + { + "text": "Demba Ba has spoken this week about how Jose Mourinho stopped his move to Arsenal last summer, and he now believes he didn't play for Chelsea as he wasn't a star name.", + "length": 167 + }, + { + "text": "Olcay is played in down the left with so much space to run in to - he cuts inside, opens up the shot into the far corner, but Szczesny will be relieved to see that one curl just wide.", + "length": 183 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4319429248571396 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:33.14945068Z", + "first_section_created": "2025-12-23T09:35:33.149859197Z", + "last_section_published": "2025-12-23T09:35:33.150328616Z", + "all_results_received": "2025-12-23T09:35:33.260377288Z", + "output_generated": "2025-12-23T09:35:33.260882509Z", + "total_processing_time_ms": 111, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 110, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:33.149859197Z", + "publish_time": "2025-12-23T09:35:33.150080106Z", + "first_worker_start": "2025-12-23T09:35:33.150688531Z", + "last_worker_end": "2025-12-23T09:35:33.258945Z", + "total_journey_time_ms": 109, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:33.150629629Z", + "start_time": "2025-12-23T09:35:33.150688531Z", + "end_time": "2025-12-23T09:35:33.150776135Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:33.150949Z", + "start_time": "2025-12-23T09:35:33.1511Z", + "end_time": "2025-12-23T09:35:33.258945Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 107 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:33.150630629Z", + "start_time": "2025-12-23T09:35:33.150707332Z", + "end_time": "2025-12-23T09:35:33.150836337Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:33.150620428Z", + "start_time": "2025-12-23T09:35:33.150694731Z", + "end_time": "2025-12-23T09:35:33.150747734Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:33.150132408Z", + "publish_time": "2025-12-23T09:35:33.150328616Z", + "first_worker_start": "2025-12-23T09:35:33.150729033Z", + "last_worker_end": "2025-12-23T09:35:33.246788Z", + "total_journey_time_ms": 96, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:33.150816937Z", + "start_time": "2025-12-23T09:35:33.15089544Z", + "end_time": "2025-12-23T09:35:33.150997144Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:33.15098Z", + "start_time": "2025-12-23T09:35:33.15113Z", + "end_time": "2025-12-23T09:35:33.246788Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 95 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:33.150770835Z", + "start_time": "2025-12-23T09:35:33.150841138Z", + "end_time": "2025-12-23T09:35:33.150968043Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:33.150679831Z", + "start_time": "2025-12-23T09:35:33.150729033Z", + "end_time": "2025-12-23T09:35:33.150768034Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 202, + "min_processing_ms": 95, + "max_processing_ms": 107, + "avg_processing_ms": 101, + "median_processing_ms": 107, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4863, + "slowest_section_id": 0, + "slowest_section_time_ms": 109 + } +} diff --git a/data/output/0061446395a8011d5d924c63bd1f4c63d0f68404.json b/data/output/0061446395a8011d5d924c63bd1f4c63d0f68404.json new file mode 100644 index 0000000..6f43d87 --- /dev/null +++ b/data/output/0061446395a8011d5d924c63bd1f4c63d0f68404.json @@ -0,0 +1,198 @@ +{ + "file_name": "0061446395a8011d5d924c63bd1f4c63d0f68404.txt", + "total_words": 223, + "top_n_words": [ + { + "word": "the", + "count": 14 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "said", + "count": 6 + }, + { + "word": "doctors", + "count": 5 + }, + { + "word": "fernandez", + "count": 5 + }, + { + "word": "s", + "count": 5 + }, + { + "word": "to", + "count": 5 + }, + { + "word": "and", + "count": 4 + }, + { + "word": "brain", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Spokesman Alfredo Scoccimarro said Saturday the president will suspend all her activities.", + "length": 90 + }, + { + "text": "A subdural hematoma is a blood clot on the brain's surface beneath its outer covering, called the dura.", + "length": 103 + }, + { + "text": "Often, in people over 60, a brain trauma can cause the blood vessels in the brain to tear, and blood to clot.", + "length": 109 + }, + { + "text": "Doctors at a Buenos Aires hospital discovered the hematoma on Saturday after a neurological evaluation, he said.", + "length": 112 + }, + { + "text": "A few days later, a spokesman for Fernandez said she did not actually have cancer and that doctors had discarded their original diagnosis.", + "length": 138 + }, + { + "text": "(CNN) -- Argentine President Cristina Fernandez de Kirchner was told to take a month off work after doctors diagnosed her with a subdural hematoma.", + "length": 147 + }, + { + "text": "Fernandez's health made headlines when she underwent surgery in January 2012 to remove her thyroid, after doctors said they detected cancer in the gland.", + "length": 153 + }, + { + "text": "The diagnosis and the doctor's recommendation mean Fernandez will be out of commission during the critical campaign season for congressional elections on October 27.", + "length": 165 + }, + { + "text": "In August, Fernandez, 60, suffered a cranial trauma, for which doctors conducted a brain scan and found normal results with no symptoms at the time, Scoccimarro said.", + "length": 166 + }, + { + "text": "According to Argentina's constitution, the vice president would assume the presidency temporarily in the president's absence, but officials have not said if that will occur in this situation.", + "length": 191 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7870411276817322 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:33.651178223Z", + "first_section_created": "2025-12-23T09:35:33.651498236Z", + "last_section_published": "2025-12-23T09:35:33.651692144Z", + "all_results_received": "2025-12-23T09:35:33.711722838Z", + "output_generated": "2025-12-23T09:35:33.711874044Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:33.651498236Z", + "publish_time": "2025-12-23T09:35:33.651692144Z", + "first_worker_start": "2025-12-23T09:35:33.652219866Z", + "last_worker_end": "2025-12-23T09:35:33.710809Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:33.652203765Z", + "start_time": "2025-12-23T09:35:33.652248367Z", + "end_time": "2025-12-23T09:35:33.652273968Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:33.6524Z", + "start_time": "2025-12-23T09:35:33.652538Z", + "end_time": "2025-12-23T09:35:33.710809Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:33.652181264Z", + "start_time": "2025-12-23T09:35:33.652247967Z", + "end_time": "2025-12-23T09:35:33.652283769Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:33.652173364Z", + "start_time": "2025-12-23T09:35:33.652219866Z", + "end_time": "2025-12-23T09:35:33.652235267Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1383, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/00614914a9f4bf01ac2d58a9185406c78b5d0406.json b/data/output/00614914a9f4bf01ac2d58a9185406c78b5d0406.json new file mode 100644 index 0000000..5bc1f4b --- /dev/null +++ b/data/output/00614914a9f4bf01ac2d58a9185406c78b5d0406.json @@ -0,0 +1,612 @@ +{ + "file_name": "00614914a9f4bf01ac2d58a9185406c78b5d0406.txt", + "total_words": 1250, + "top_n_words": [ + { + "word": "i", + "count": 47 + }, + { + "word": "the", + "count": 38 + }, + { + "word": "and", + "count": 37 + }, + { + "word": "a", + "count": 35 + }, + { + "word": "of", + "count": 34 + }, + { + "word": "to", + "count": 32 + }, + { + "word": "her", + "count": 28 + }, + { + "word": "she", + "count": 25 + }, + { + "word": "for", + "count": 20 + }, + { + "word": "josie", + "count": 20 + } + ], + "sorted_sentences": [ + { + "text": "co.", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "uk .", + "length": 4 + }, + { + "text": "Not .", + "length": 5 + }, + { + "text": "Josie .", + "length": 7 + }, + { + "text": "'I think .", + "length": 10 + }, + { + "text": "night out.", + "length": 10 + }, + { + "text": "put weight on.", + "length": 14 + }, + { + "text": "Prices range .", + "length": 14 + }, + { + "text": "ages and sizes.", + "length": 15 + }, + { + "text": "Bianca London .", + "length": 15 + }, + { + "text": "But I've eaten .", + "length": 16 + }, + { + "text": "Sometimes I eat .", + "length": 17 + }, + { + "text": "I've got to do it .", + "length": 19 + }, + { + "text": "to try out is Yoga.", + "length": 19 + }, + { + "text": "of weight loss for her.", + "length": 23 + }, + { + "text": "prefer the normal life.", + "length": 23 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "I came out of Big Brother.", + "length": 26 + }, + { + "text": "I've been like that since .", + "length": 27 + }, + { + "text": "and modelling it all herself.", + "length": 29 + }, + { + "text": "of six stone in just one year.", + "length": 30 + }, + { + "text": "women of all shapes and sizes.", + "length": 30 + }, + { + "text": "cakes today so I feel bad now.", + "length": 30 + }, + { + "text": "The Josie Gibson for Goddiva .", + "length": 30 + }, + { + "text": "'It's like I'm not living in a .", + "length": 32 + }, + { + "text": "so I think yoga would be good, .", + "length": 32 + }, + { + "text": "The collaboration will see Josie .", + "length": 34 + }, + { + "text": "I'm terrified of going back there.", + "length": 34 + }, + { + "text": "I'm terrified of going back there.", + "length": 34 + }, + { + "text": "'I can't believe that all this is .", + "length": 35 + }, + { + "text": "The 28-year-old was approached by .", + "length": 35 + }, + { + "text": "'I did put my heart and soul into it.", + "length": 37 + }, + { + "text": "From a fitted monochrome dress with .", + "length": 37 + }, + { + "text": "One type of exercise she would love .", + "length": 37 + }, + { + "text": "eight times a day but the right stuff.", + "length": 38 + }, + { + "text": "Speaking about her healthy new diet, .", + "length": 38 + }, + { + "text": "In May 2012 Josie was horrified when .", + "length": 38 + }, + { + "text": "my exercise has made me better in bed!", + "length": 38 + }, + { + "text": "Josie explained that exercise forms a .", + "length": 39 + }, + { + "text": "'It's a lot of pressure, imagine if I .", + "length": 39 + }, + { + "text": "keen to integrate into a new collection.", + "length": 40 + }, + { + "text": "'I love eating healthy, I love popping .", + "length": 40 + }, + { + "text": "which led her to lose the excess pounds.", + "length": 40 + }, + { + "text": "'First I want to help other people lose .", + "length": 41 + }, + { + "text": "links between her own website and Goddiva's.", + "length": 44 + }, + { + "text": "reality, I keep thinking I'm going to wake up.", + "length": 46 + }, + { + "text": "caught up in the fickle world of fame and fortune.", + "length": 50 + }, + { + "text": "She said: 'I can be quite highly strung sometimes .", + "length": 51 + }, + { + "text": "weight and then do a second collection with Goddiva.", + "length": 52 + }, + { + "text": "down to the farm shop and buying all the good stuff.", + "length": 52 + }, + { + "text": "she explained: 'I eat all the time but little snacks.", + "length": 53 + }, + { + "text": "Josie Gibson for Goddiva is available now at Goddiva.", + "length": 53 + }, + { + "text": "ex-BB housemate hopes her own range will be a similar hit.", + "length": 58 + }, + { + "text": "maybe Bikram Yoga, which apparently makes you better in bed.", + "length": 60 + }, + { + "text": "dress reminiscent of Moschino's spring/summer 13 catwalk look.", + "length": 62 + }, + { + "text": "She said: 'It's a lot of pressure, imagine if I put weight on.", + "length": 62 + }, + { + "text": "I do think she would be proud of me, I hope so anyway,' she said.", + "length": 65 + }, + { + "text": "'I don't really have any celebrity friends, I like normal people.", + "length": 65 + }, + { + "text": "panel, Josie's collection of dresses are perfect for any glamorous .", + "length": 68 + }, + { + "text": "collection combines Josie's love of fun prints, bodycon dresses and .", + "length": 69 + }, + { + "text": "Goddiva due to her entertaining, flirty personality, which they were .", + "length": 70 + }, + { + "text": "I love Cheska Hull from Made in Chelsea, I love all those Chelsea lot.", + "length": 70 + }, + { + "text": "giveaways, as well as providing weekly style advice, creating online .", + "length": 70 + }, + { + "text": "flattering maxis, making it an accessible collection for women of all .", + "length": 71 + }, + { + "text": "vital part of her new health plan and she trains four or five times a .", + "length": 71 + }, + { + "text": "from £42 to £55, and her dresses also include a trendy animal print .", + "length": 71 + }, + { + "text": "it in record time, dropping three stone in three months - and a total .", + "length": 71 + }, + { + "text": "she saw unflattering photographs of herself in a bikini on the beach, .", + "length": 71 + }, + { + "text": "follows in the footsteps of Made In Chelsea star Louise Thompson, whose .", + "length": 73 + }, + { + "text": "It's always going to be my life now because I'm one of those people who .", + "length": 73 + }, + { + "text": "only did the TV personality lose a staggering amount of weight, she did .", + "length": 73 + }, + { + "text": "week, with high intensity training serving as the most effective method .", + "length": 73 + }, + { + "text": "bling embellishments, to a vibrant green number with a revealing cut out .", + "length": 74 + }, + { + "text": "interacting with fans and Goddiva customers with online competitions and .", + "length": 74 + }, + { + "text": "two collections for the online retailer were a sell-out success, and the .", + "length": 74 + }, + { + "text": "Now that I've lost weight I want to design some midriff baring dresses,' she said.", + "length": 82 + }, + { + "text": "happening to me, I am just a normal girl who goes home and cleans up poo on a farm, I .", + "length": 87 + }, + { + "text": "I've got to do it because I'm not just doing it for me anymore, I'm doing it for everyone.", + "length": 90 + }, + { + "text": "' Girly glam: Prices range from £42-£55 and her dresses are ideal for a girl's night out .", + "length": 92 + }, + { + "text": "because I'm not just doing it for me anymore, I'm doing it for everyone' 'I felt guilty at first.", + "length": 97 + }, + { + "text": "She is also a big fan of Davina McCall, who she became close with during her stint on Big Brother.", + "length": 98 + }, + { + "text": "A healthy new diet helped her slim down from 16st 7lb and a size 18 to 10st 2lb and a svelte size eight.", + "length": 104 + }, + { + "text": "'It's such a fickle world that we live in, the offers have been rolling in since I lost weight,' she said.", + "length": 106 + }, + { + "text": "While she is relishing all the opportunities that come her way, the Bristolian is adamant that she won't get .", + "length": 110 + }, + { + "text": "always has to watch what they eat and make sure I train it off; I'm not built petite, I'm built bigger,' she said.", + "length": 114 + }, + { + "text": "And now Big Brother winner and Bristol resident Josie Gibson is flaunting the body she has worked so hard to achieve.", + "length": 117 + }, + { + "text": "Big loser: Josie went from 16st 7lb and a size 18 to 10st 2lb and a svelte size 8, and what better way to show it off?", + "length": 118 + }, + { + "text": "The team formed in mid-2012, before Josie's remarkable weight loss, when the label was keen to create a collection for .", + "length": 120 + }, + { + "text": "'I thought Pete Burns was going to be vile but he's actually the loveliest bloke and John McCririck is a really nice guy.", + "length": 121 + }, + { + "text": "Prints and minis: The Josie Gibson for Goddiva collection combines Josie's love of fun prints, bodycon dresses and flattering maxis .", + "length": 133 + }, + { + "text": "' Ever since debuting her new body, Josie has been hot property and been snapped up to pose for a number of magazines and lingerie shoots.", + "length": 138 + }, + { + "text": "'I was so worried about putting the weight back on I didn't buy any new clothes but I've put all my old clothes on eBay now,' she admitted.", + "length": 139 + }, + { + "text": "' Bubbly: Josie was chosen to turn her hand to designing because of her fun-loving personality, which they wanted to echo through her clothes .", + "length": 143 + }, + { + "text": "She has become the inspiration for women everywhere wanting to lose weight after shrinking from a size 20 to a svelte size eight in just one year.", + "length": 146 + }, + { + "text": "But the super slimmer has revealed that, although she is relishing her slim new body, she is absolutely terrified about putting the weight back on.", + "length": 147 + }, + { + "text": "And while she is currently occupied with promoting her new collection and sticking to her healthy lifestyle, the star has big plans for the future.", + "length": 147 + }, + { + "text": "Showing off her hard work: Josie Gibson, who shot to fame on Big Brother, is showcasing her svelte new figure in her debut clothing line for Goddiva .", + "length": 150 + }, + { + "text": "Health kick: After seeing photos of herself on the beach Josie adopted a super healthy new lifestyle and now loves eating all the right kinds of food .", + "length": 151 + }, + { + "text": "Before and after: Josie shed an amazing amount of weight after seeing photographs of herself in a bikini and says she is terrified of going back there .", + "length": 152 + }, + { + "text": "She has since knocked Davina off the top of the exercise video charts with her debut fitness DVD, 30 Second Slim, but she is assured that the presenter would be proud of her.", + "length": 174 + }, + { + "text": "And to celebrate her hot new body, Josie has joined forces with online fashion retailer Goddiva to create the Josie Gibson for Goddiva collection, a range of glamorous evening-wear.", + "length": 181 + }, + { + "text": "Following the launch of her first fitness DVD (and several steamy lingerie shoots), the bubbly blonde has turned her hand to designing, releasing her debut collection of clothes for online fashion retailer Goddiva....", + "length": 217 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4217079281806946 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:34.152454847Z", + "first_section_created": "2025-12-23T09:35:34.152743659Z", + "last_section_published": "2025-12-23T09:35:34.153074273Z", + "all_results_received": "2025-12-23T09:35:34.25988091Z", + "output_generated": "2025-12-23T09:35:34.260099119Z", + "total_processing_time_ms": 107, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 106, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:34.152743659Z", + "publish_time": "2025-12-23T09:35:34.152963668Z", + "first_worker_start": "2025-12-23T09:35:34.15349559Z", + "last_worker_end": "2025-12-23T09:35:34.258924Z", + "total_journey_time_ms": 106, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:34.153476889Z", + "start_time": "2025-12-23T09:35:34.153528391Z", + "end_time": "2025-12-23T09:35:34.153630796Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:34.153686Z", + "start_time": "2025-12-23T09:35:34.153822Z", + "end_time": "2025-12-23T09:35:34.258924Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 105 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:34.153425487Z", + "start_time": "2025-12-23T09:35:34.15349559Z", + "end_time": "2025-12-23T09:35:34.153595494Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:34.15349489Z", + "start_time": "2025-12-23T09:35:34.153546492Z", + "end_time": "2025-12-23T09:35:34.153585794Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:34.152993069Z", + "publish_time": "2025-12-23T09:35:34.153074273Z", + "first_worker_start": "2025-12-23T09:35:34.153524391Z", + "last_worker_end": "2025-12-23T09:35:34.238466Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:34.153473889Z", + "start_time": "2025-12-23T09:35:34.153524391Z", + "end_time": "2025-12-23T09:35:34.153557893Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:34.15374Z", + "start_time": "2025-12-23T09:35:34.153888Z", + "end_time": "2025-12-23T09:35:34.238466Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:34.153521091Z", + "start_time": "2025-12-23T09:35:34.153569693Z", + "end_time": "2025-12-23T09:35:34.153617695Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:34.153507791Z", + "start_time": "2025-12-23T09:35:34.153545292Z", + "end_time": "2025-12-23T09:35:34.153570193Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 189, + "min_processing_ms": 84, + "max_processing_ms": 105, + "avg_processing_ms": 94, + "median_processing_ms": 105, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3370, + "slowest_section_id": 0, + "slowest_section_time_ms": 106 + } +} diff --git a/data/output/00617c5ed58860ccfadf4095141dacc18c6ac832.json b/data/output/00617c5ed58860ccfadf4095141dacc18c6ac832.json new file mode 100644 index 0000000..0a54c76 --- /dev/null +++ b/data/output/00617c5ed58860ccfadf4095141dacc18c6ac832.json @@ -0,0 +1,680 @@ +{ + "file_name": "00617c5ed58860ccfadf4095141dacc18c6ac832.txt", + "total_words": 1605, + "top_n_words": [ + { + "word": "the", + "count": 76 + }, + { + "word": "a", + "count": 53 + }, + { + "word": "and", + "count": 49 + }, + { + "word": "to", + "count": 37 + }, + { + "word": "he", + "count": 36 + }, + { + "word": "his", + "count": 34 + }, + { + "word": "in", + "count": 34 + }, + { + "word": "widstrand", + "count": 32 + }, + { + "word": "was", + "count": 28 + }, + { + "word": "of", + "count": 25 + } + ], + "sorted_sentences": [ + { + "text": "A .", + "length": 3 + }, + { + "text": "...", + "length": 3 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "forward.", + "length": 8 + }, + { + "text": "pockets.", + "length": 8 + }, + { + "text": "He was .", + "length": 8 + }, + { + "text": "speaking.", + "length": 9 + }, + { + "text": "Wearing .", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Hopefully .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "return home.", + "length": 12 + }, + { + "text": "on August 31.", + "length": 13 + }, + { + "text": "I lashed out.", + "length": 13 + }, + { + "text": "'My dad was right.", + "length": 18 + }, + { + "text": "I wish them the best.", + "length": 21 + }, + { + "text": "I eat less junk food.", + "length": 21 + }, + { + "text": "I was a young kid once.", + "length": 23 + }, + { + "text": "He also wears a helmet .", + "length": 24 + }, + { + "text": "will be able to move home.", + "length": 26 + }, + { + "text": "Progress can stop tomorrow.", + "length": 27 + }, + { + "text": "We might not know for years.", + "length": 28 + }, + { + "text": "05:22 EST, 24 October 2013 .", + "length": 28 + }, + { + "text": "01:02 EST, 24 October 2013 .", + "length": 28 + }, + { + "text": "Every day, I see something, .", + "length": 29 + }, + { + "text": "'Moving on to the next thing.", + "length": 29 + }, + { + "text": "'I have people praying for me.", + "length": 30 + }, + { + "text": "He suffered 'potentially fatal .", + "length": 32 + }, + { + "text": "'Moving somewhere,' Widstrand said.", + "length": 35 + }, + { + "text": "'It was just young kids lashing out.", + "length": 36 + }, + { + "text": "He's hoping to be home by Christmas.", + "length": 36 + }, + { + "text": "It might not stop two years from now.", + "length": 37 + }, + { + "text": "Sara Malm, Alex Greig and Helen Pow .", + "length": 37 + }, + { + "text": "'It's been trying at times,' he said.", + "length": 37 + }, + { + "text": "get past them, witnesses told police.", + "length": 37 + }, + { + "text": "I'm healthier, so that has been nice.", + "length": 37 + }, + { + "text": "The 27-year-old was walking home in .", + "length": 37 + }, + { + "text": "I have to go the extra mile,' he said.", + "length": 38 + }, + { + "text": "Witnesses reported seeing blood coming .", + "length": 40 + }, + { + "text": "'They're talking December first,' said .", + "length": 40 + }, + { + "text": "He was hit in the head with a 'can in a .", + "length": 41 + }, + { + "text": "' He added: 'I'm indifferent towards them.", + "length": 42 + }, + { + "text": "He was placed in a medically-induced coma .", + "length": 43 + }, + { + "text": "escalated into a bust-up between local gangs.", + "length": 45 + }, + { + "text": "whether it's in his eyes or something he does.", + "length": 46 + }, + { + "text": "'I quit smoking since I've been in the hospital.", + "length": 48 + }, + { + "text": "'It was longer than the next day,' said his father.", + "length": 51 + }, + { + "text": "'To me it seemed like the next day,' Widstrand said.", + "length": 52 + }, + { + "text": "has faced in the two months since the beating occurred.", + "length": 55 + }, + { + "text": "they will at least answer to that,' Peter Widstrand added.", + "length": 58 + }, + { + "text": "can't imagine why someone would do this to another person.", + "length": 58 + }, + { + "text": "He said there are more builders than there are tearers down.", + "length": 60 + }, + { + "text": "The trial for Issac Maiden, 19, pictured, will start Monday .", + "length": 61 + }, + { + "text": "third removed Widstrand's trousers and the group searched his .", + "length": 63 + }, + { + "text": "sock', stomped on, had his pants ripped off him and was robbed.", + "length": 63 + }, + { + "text": "and speech therapy as he recovers from his serious brain injury.", + "length": 64 + }, + { + "text": "'I didn't even know I got beaten up till the next day,' he said.", + "length": 64 + }, + { + "text": "placed in a medically-induced coma when he was taken to hospital.", + "length": 65 + }, + { + "text": "'That was just a freak accident,' Widstrand told the Star Tribune.", + "length": 66 + }, + { + "text": "He thanks his friends, family and the community for their support.", + "length": 66 + }, + { + "text": "from his head after which a second man stomped on his head while a .", + "length": 68 + }, + { + "text": "'Seeing all the support - I just had a birthday last month, and it .", + "length": 68 + }, + { + "text": "' The young man had moved to an apartment in the Minnesota city two .", + "length": 69 + }, + { + "text": "police station and turned the violence on Widstrand when he tried to .", + "length": 70 + }, + { + "text": "local newspaper reported that the fight had started at a party nearby .", + "length": 71 + }, + { + "text": "Institute where he's slowly re-learning basic skills like walking and .", + "length": 71 + }, + { + "text": "'But it's also been a learning experience for me,' Widstrand told KSTP.", + "length": 71 + }, + { + "text": "and a group of girls had taken it to the street after which the brawl .", + "length": 71 + }, + { + "text": "months before the attack and was working for a local community channel.", + "length": 71 + }, + { + "text": "He has been undergoing intensive rehabilitation at Courage Kenny Rehab .", + "length": 72 + }, + { + "text": "has been so cool, getting all the cards,' said Widstrand, who turned 27 .", + "length": 73 + }, + { + "text": "Widstrand's father Peter Widstrand, who says he doesn't know if his son .", + "length": 73 + }, + { + "text": "a helmet to protect his head, Widstrand spoke about the difficulties he .", + "length": 73 + }, + { + "text": "large group was fighting on a street located mere blocks from the local .", + "length": 73 + }, + { + "text": "' Widstrand said he has no recollection of the night his life changed forever.", + "length": 78 + }, + { + "text": "Tips from witnesses led to five arrests: 19-year-old Maiden and four juveniles.", + "length": 79 + }, + { + "text": "Trials for the underage teens, two of whom are just 15 years old, are underway.", + "length": 79 + }, + { + "text": "Peter Widstrand told The Pioneer Press: 'It seems every day we see some steps .", + "length": 80 + }, + { + "text": "'[But] we're still in the place where we don't know if he'll return to work or .", + "length": 80 + }, + { + "text": "Suspect: Five people, one adult and four juveniles, have been charged in the assault.", + "length": 85 + }, + { + "text": "' Five people, one adult and four juveniles, have been charged in the assault on Widstrand.", + "length": 91 + }, + { + "text": "He will remain in hospital in the coming months, with a tentative release date of December 1.", + "length": 93 + }, + { + "text": "A pair of 15-year-olds are still awaiting a judge to decide if they will stand trial as adults.", + "length": 95 + }, + { + "text": "brain swelling' and 'permanent and protracted loss of brain function' as a result of the beating.", + "length": 97 + }, + { + "text": "'He was out for a walk and decided to go through this group and was assaulted and knocked unconscious.", + "length": 102 + }, + { + "text": "Widstrand spent several days in a medically induced coma and suffered potentially fatal brain swelling.", + "length": 103 + }, + { + "text": "Cindarion Butler, 16, was certified as an adult last week and will appear at a hearing later this month.", + "length": 104 + }, + { + "text": "But in an interview earlier this month, the St Paul resident said he bears no hatred toward his assailants.", + "length": 107 + }, + { + "text": "The 27-year-old victim has undergone multiple surgeries and still has months of physical therapy ahead of him.", + "length": 110 + }, + { + "text": "his neighborhood on St Paul's East Side when he was attacked by a group of 30 to 50 juveniles and young adults.", + "length": 111 + }, + { + "text": "Despite the hardships he has faced, Widstrand has been overwhelmed by the love and support of his family and community.", + "length": 119 + }, + { + "text": "According to the police report, Widstrand was hit over the head with a 'can in a sock' which knocked him to the ground.", + "length": 119 + }, + { + "text": "A third 15-year-old suspect will learn whether his case will be handled in juvenile court or criminal court in December.", + "length": 120 + }, + { + "text": "The world is a good place if you just give it a chance,” Widstrand told the Tribune from his room at the rehab center.", + "length": 120 + }, + { + "text": "' Widstrand has already undergone two surgeries for brain swelling and skull fractures and will face a third in November.", + "length": 121 + }, + { + "text": "The young man is due to undergo more surgery in November, this time to have the blone flaps on his head put back into place.", + "length": 124 + }, + { + "text": "Strong: Widstrand has already undergone two surgeries for brain swelling and skull fractures and will face a third in November .", + "length": 128 + }, + { + "text": "Maiden has been charged with first-degree assault, first-degree aggravated robbery and crime committed for the benefit of a gang.", + "length": 129 + }, + { + "text": "Speaking at a press conference in August, Peter Widstrand said his son had never ‘expressed any concerns' about living on the East Side.", + "length": 138 + }, + { + "text": "Crime scene: Mr Widstrand was walking through his local neighbourhood on St Paul's East Side when he was brutally assaulted, robbed and stripped .", + "length": 146 + }, + { + "text": "'An innocent man was walking down the street and brutally attacked by a group of youth,' police spokesman Sergeant Paul Paulos, told Pioneer Press.", + "length": 147 + }, + { + "text": "If he violates the conditions of the plea agreement, the 15-year-old would have to serve more than eight years in prison, The Star Tribune reported.", + "length": 148 + }, + { + "text": "A 15-year-old boy from Minnesota charged with beating a man into a coma over the summer has been sentenced to 18 to 24 months as part of a plea deal.", + "length": 149 + }, + { + "text": "Inspirational: Ray Widstrand, pictured, was beaten within an inch of his life but he has forgiven the five teens responsible for the shocking, unprovoked attack .", + "length": 162 + }, + { + "text": "But I am recovering and I'll be back soon enough, hopefully I can go back to work, back to my normal life, back to living on my own and back to business as usual.", + "length": 162 + }, + { + "text": "No memory: Widstrand, pictured before the life-changing attack, was hit in the head with a 'can in a sock', stomped on, had his pants ripped off him and was robbed.", + "length": 164 + }, + { + "text": "The teenager will spend up to two years in a secure juvenile facility and will remain under enhanced supervision until his 21st birthday, MyFox Twin Cities reported.", + "length": 165 + }, + { + "text": "Brutal beating: After Mr Widstrand was knocked over one person was seen stomping on his head after which a third stripped him of his trousers to search his pockets .", + "length": 165 + }, + { + "text": "The jury trial for Issac Maiden, 19, who faces first-degree assault and aggravated robbery charges as well as two counts of gang-related crimes, will start November 4.", + "length": 167 + }, + { + "text": "Speaking slowly and haltingly, Widstrand told a local news station that he has no recollection of the night heartless youths robbed and beat him to the brink of death.", + "length": 167 + }, + { + "text": "Harrowing experience: The 27-year-old, pictured this week, said he'd learned a lot from the harrowing experience, which left him in a wheelchair and unable to perform everyday tasks.", + "length": 182 + }, + { + "text": "Police found Widstrand lying on the ground, bleeding from his nose and mouth, his pants removed and shirt torn, according to a criminal complaint filed by the Ramsey County attorney's office.", + "length": 191 + }, + { + "text": "' Family support: Ray Widstrand's parents Linda and Peter spoke during a press conference alongside his sister Alice, expressing their shock and disbelief at the violent assault on their son and brother .", + "length": 204 + }, + { + "text": "The inspirational cable programming assistant said he'd learned a lot from the harrowing experience, which left him in a wheelchair and unable to perform basic tasks like shaving or using the bathroom alone.", + "length": 207 + }, + { + "text": "Earlier this month, soon after he began constructing sentences again following his severe brain injuries, Widstrand gave his first interview from his wheelchair at a fundraiser held in Minneapolis to help pay for his treatments.", + "length": 228 + }, + { + "text": "The minor, who has not been named because of his age, pleaded guilty this week to first-degree felony assault and agreed to testify against his alleged accomplices in the brutal August attack on 26-year-old Ray Widstrand in St Paul.", + "length": 232 + }, + { + "text": "On the night of August 4, then-26-year-old Widstrand was walking home in the vicinity of near Payne and Minnehaha avenues when he found himself in the middle of a terrifying gang confrontation and was kicked, stomped on and left for dead.", + "length": 238 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5229697227478027 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:34.653861376Z", + "first_section_created": "2025-12-23T09:35:34.654158989Z", + "last_section_published": "2025-12-23T09:35:34.654588007Z", + "all_results_received": "2025-12-23T09:35:34.776824185Z", + "output_generated": "2025-12-23T09:35:34.777077295Z", + "total_processing_time_ms": 123, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 122, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:34.654158989Z", + "publish_time": "2025-12-23T09:35:34.654458701Z", + "first_worker_start": "2025-12-23T09:35:34.654930521Z", + "last_worker_end": "2025-12-23T09:35:34.740997Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:34.654858318Z", + "start_time": "2025-12-23T09:35:34.654930521Z", + "end_time": "2025-12-23T09:35:34.655006224Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:34.655184Z", + "start_time": "2025-12-23T09:35:34.655327Z", + "end_time": "2025-12-23T09:35:34.740997Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 85 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:34.654933121Z", + "start_time": "2025-12-23T09:35:34.654987123Z", + "end_time": "2025-12-23T09:35:34.655103128Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:34.65489992Z", + "start_time": "2025-12-23T09:35:34.654954922Z", + "end_time": "2025-12-23T09:35:34.654993223Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:34.654495103Z", + "publish_time": "2025-12-23T09:35:34.654588007Z", + "first_worker_start": "2025-12-23T09:35:34.654957122Z", + "last_worker_end": "2025-12-23T09:35:34.775925Z", + "total_journey_time_ms": 121, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:34.655059726Z", + "start_time": "2025-12-23T09:35:34.655138229Z", + "end_time": "2025-12-23T09:35:34.655223033Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:34.655242Z", + "start_time": "2025-12-23T09:35:34.655363Z", + "end_time": "2025-12-23T09:35:34.775925Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 120 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:34.654936921Z", + "start_time": "2025-12-23T09:35:34.654991623Z", + "end_time": "2025-12-23T09:35:34.655102228Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:34.654896119Z", + "start_time": "2025-12-23T09:35:34.654957122Z", + "end_time": "2025-12-23T09:35:34.654995524Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 205, + "min_processing_ms": 85, + "max_processing_ms": 120, + "avg_processing_ms": 102, + "median_processing_ms": 120, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4533, + "slowest_section_id": 1, + "slowest_section_time_ms": 121 + } +} diff --git a/data/output/00618e25f8475c67e5be3528022efa26bdd5258f.json b/data/output/00618e25f8475c67e5be3528022efa26bdd5258f.json new file mode 100644 index 0000000..188b033 --- /dev/null +++ b/data/output/00618e25f8475c67e5be3528022efa26bdd5258f.json @@ -0,0 +1,246 @@ +{ + "file_name": "00618e25f8475c67e5be3528022efa26bdd5258f.txt", + "total_words": 450, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "as", + "count": 6 + }, + { + "word": "cartoon", + "count": 6 + }, + { + "word": "european", + "count": 6 + }, + { + "word": "on", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "And he will make you vote.", + "length": 26 + }, + { + "text": "\"The latter was the intention.", + "length": 30 + }, + { + "text": "(CNN) -- It's a perennial problem.", + "length": 34 + }, + { + "text": "CNN's Kim Norgaard contributed to this report.", + "length": 46 + }, + { + "text": "May 25 is the day on which Danish voters will go to the polls.", + "length": 62 + }, + { + "text": "How do you persuade young, apathetic voters to go to the polls?", + "length": 63 + }, + { + "text": "The cartoon opens with two apparently politically disaffected young men.", + "length": 72 + }, + { + "text": "Enter \"Voteman\" -- Denmark's rather ill-judged and short-lived cartoon solution.", + "length": 80 + }, + { + "text": "\"Horrified by this, he decided he would dedicate his life to making everybody vote.", + "length": 83 + }, + { + "text": "So if you're not going to vote, don't try to run, don't try to hide, because he will find you.", + "length": 94 + }, + { + "text": "Less surprisingly perhaps, it has now been pulled and an apology made for its graphic sexual and violent content.", + "length": 113 + }, + { + "text": "Denmark's turnout for the last European elections in 2009 was close to 60%, well above the European average of 43%.", + "length": 115 + }, + { + "text": "\" The cartoon tells the story of how Voteman, as a young man, once forgot to vote in European Parliament elections.", + "length": 115 + }, + { + "text": "But I acknowledge that Parliament, as an institution, in future has to show more caution in what we put our name to.", + "length": 116 + }, + { + "text": "The cartoon is the unlikely creation of the Danish Parliament's EU information center, originally posted to its official YouTube page.", + "length": 134 + }, + { + "text": "\" The European Parliament elections, in which voters in each of the European Union's member states elect representatives to the body, are taking place across Europe next week.", + "length": 175 + }, + { + "text": "The scene switches to Voteman, a muscle-bound, stubble-chinned superhero, answering a call asking him to persuade voters to have their say in the upcoming European Parliament elections.", + "length": 185 + }, + { + "text": "An orgy of cartoon violence follows -- one of the original men is decapitated, while other would-be non-voters are punched, slapped and tossed through the windows of a polling station to vote.", + "length": 192 + }, + { + "text": "\"Reaction in social media is sharply divided between those who see this as unacceptably vulgar, and those who think it is tough but acceptable humor which brings attention to the vote on May 25.", + "length": 194 + }, + { + "text": "This, the narration says, taught him a painful lesson: \"No influence on climate regulations, agricultural subsidies, chemicals in toys -- and the amount of cinnamon allowed in his cinnamon buns.", + "length": 194 + }, + { + "text": "Mogens Lykketoft, speaker of the Danish Parliament, said in a statement on his Facebook page that many people had perceived the cartoon as \"more serious and offensive than it was intended, and see it as talking down to the youth.", + "length": 229 + }, + { + "text": "Naked, he leaps up from a bed surrounded by women apparently performing sex acts on him and -- having donned a leather waistcoat and trousers -- sets off from a Bond villain-esque island hideout on his mission, riding a pair of harnessed dolphins as waterskis.", + "length": 260 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8270972967147827 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:35.15561322Z", + "first_section_created": "2025-12-23T09:35:35.155921133Z", + "last_section_published": "2025-12-23T09:35:35.156065439Z", + "all_results_received": "2025-12-23T09:35:35.222832213Z", + "output_generated": "2025-12-23T09:35:35.222979819Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:35.155921133Z", + "publish_time": "2025-12-23T09:35:35.156065439Z", + "first_worker_start": "2025-12-23T09:35:35.156512258Z", + "last_worker_end": "2025-12-23T09:35:35.221935Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:35.156555159Z", + "start_time": "2025-12-23T09:35:35.156622062Z", + "end_time": "2025-12-23T09:35:35.156673564Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:35.156738Z", + "start_time": "2025-12-23T09:35:35.15688Z", + "end_time": "2025-12-23T09:35:35.221935Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:35.156496857Z", + "start_time": "2025-12-23T09:35:35.156553059Z", + "end_time": "2025-12-23T09:35:35.156603861Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:35.156454255Z", + "start_time": "2025-12-23T09:35:35.156512258Z", + "end_time": "2025-12-23T09:35:35.156531158Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2631, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0061dddffc23dab36031475efdc679ab13be3a50.json b/data/output/0061dddffc23dab36031475efdc679ab13be3a50.json new file mode 100644 index 0000000..bd7f5ab --- /dev/null +++ b/data/output/0061dddffc23dab36031475efdc679ab13be3a50.json @@ -0,0 +1,496 @@ +{ + "file_name": "0061dddffc23dab36031475efdc679ab13be3a50.txt", + "total_words": 1266, + "top_n_words": [ + { + "word": "the", + "count": 76 + }, + { + "word": "sterling", + "count": 36 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "a", + "count": 35 + }, + { + "word": "and", + "count": 35 + }, + { + "word": "said", + "count": 33 + }, + { + "word": "of", + "count": 30 + }, + { + "word": "s", + "count": 22 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "court", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "Dr.", + "length": 3 + }, + { + "text": "Dr.", + "length": 3 + }, + { + "text": "Mrs.", + "length": 4 + }, + { + "text": "\"Mr.", + "length": 4 + }, + { + "text": "\"Mr.", + "length": 4 + }, + { + "text": "But Mr.", + "length": 7 + }, + { + "text": "Meril S.", + "length": 8 + }, + { + "text": "Stephen L.", + "length": 10 + }, + { + "text": "speedy court date.", + "length": 18 + }, + { + "text": "\"It's unfortunate.", + "length": 18 + }, + { + "text": "Stiviano surfaced.", + "length": 18 + }, + { + "text": "Doctors' findings .", + "length": 19 + }, + { + "text": "Sterling as a trustee.", + "length": 22 + }, + { + "text": "\" September deadline .", + "length": 22 + }, + { + "text": "The meeting was canceled.", + "length": 25 + }, + { + "text": "So there's three doctors.", + "length": 25 + }, + { + "text": "So we are here because Mr.", + "length": 26 + }, + { + "text": "\"This is a complex business.", + "length": 28 + }, + { + "text": "We only need two,\" O'Donnell said.", + "length": 34 + }, + { + "text": "The commissioner fined Sterling $2.", + "length": 35 + }, + { + "text": "Platzer and Spar,\" court papers said.", + "length": 37 + }, + { + "text": "You have a $2 billion basketball team.", + "length": 38 + }, + { + "text": "Technically, a family trust owns the Clippers.", + "length": 46 + }, + { + "text": "5 million and banned him for life from the NBA.", + "length": 47 + }, + { + "text": "Another specialist in geriatric psychiatry, Dr.", + "length": 47 + }, + { + "text": "The recording included a series of racist comments.", + "length": 51 + }, + { + "text": "Read, \"confirmed the methodology and conclusions of Drs.", + "length": 56 + }, + { + "text": "In addition to damages, the lawsuit seeks a restraining order.", + "length": 62 + }, + { + "text": "\"From the onset, I did not want to sell the Los Angeles Clippers.", + "length": 65 + }, + { + "text": "In Shelly Sterling's court filings, NBA general counsel Richard W.", + "length": 66 + }, + { + "text": "I have worked for 33 years to build the team,\" Donald Sterling said.", + "length": 68 + }, + { + "text": "Sterling regrets having to go to court and publicly air this problem.", + "length": 69 + }, + { + "text": "\"If it does not go forward, the consequences are dire,\" Streisand said.", + "length": 71 + }, + { + "text": "\"And I don't believe any court is going to make a finding to the contrary.", + "length": 74 + }, + { + "text": "Ballmer is insisting that the court bless the transaction,\" Streisand said.", + "length": 75 + }, + { + "text": "Court papers say her husband shows early Alzheimer's or other brain disease.", + "length": 76 + }, + { + "text": "The probate court agreed to hold a four-day trial on the issue, beginning July 7.", + "length": 81 + }, + { + "text": "Sterling's conduct in reneging on the sale requires her to do so,\" O'Donnell said.", + "length": 82 + }, + { + "text": "Ballmer is not going to stick around for years, for this to wind through the courts.", + "length": 84 + }, + { + "text": "\"Bottom line, Donald Sterling does not want to sell the team,\" attorney Bobby Samini said.", + "length": 90 + }, + { + "text": "Donald Sterling is mentally sound, one of his lawyers, Maxwell Blecher, told CNN on Tuesday.", + "length": 92 + }, + { + "text": "Sterling's comments, first posted on TMZ, sparked outrage among NBA players, executives and fans.", + "length": 97 + }, + { + "text": "Sterling has been embroiled in controversy since a recording of a conversation with his friend V.", + "length": 97 + }, + { + "text": "In a statement Tuesday, Sterling said he has apologized for the remarks and his apology is sincere.", + "length": 99 + }, + { + "text": "Sterling says NBA officials are 'bullies,' 'hypocrites,' 'monsters' NBA commissioner: Sterling saga not over yet .", + "length": 114 + }, + { + "text": "\"The trust agreement provides that if two qualified physicians certify that he's mentally incapacitated, he's removed.", + "length": 118 + }, + { + "text": "Donald Sterling recently underwent a CT scan and a PET scan of his brain, according to Shelly Sterling's court filings.", + "length": 119 + }, + { + "text": "\" O'Donnell said his client sought an expedited hearing \"given the fact this is a very important transaction,\" he said.", + "length": 119 + }, + { + "text": "\" Under the trust agreement, if Donald Sterling became mentally incapacitated, he would be removed as a trustee, O'Donnell said.", + "length": 128 + }, + { + "text": "And the NBA has made it very clear that it will take over the team, and that is a consequence that is not going to benefit the Sterling family.", + "length": 143 + }, + { + "text": "But O'Donnell said that three physicians have certified that Donald Sterling lacks the mental capacity to function as a trustee of the complex trust.", + "length": 149 + }, + { + "text": "\"To date, I have not received Donald's written consent to the sale of the Clippers to (Ballmer) for $2 billion,\" Shelly Sterling said in court papers.", + "length": 150 + }, + { + "text": "In one document, Shelly Sterling said her husband \"has gone back and forth between opposing the sale and supporting the sale\" of the team since May 29.", + "length": 151 + }, + { + "text": "Sterling's legal maneuver comes as three physicians say her estranged husband, Donald, 80, is mentally incapacitated, said her attorney, Pierce O'Donnell.", + "length": 154 + }, + { + "text": "Sterling's lawsuit makes clear that he believes the NBA has no right to force such a sale, and the league was wrong in banning him for life and fining him.", + "length": 155 + }, + { + "text": "Buchanan said if the Sterlings don't sell the team by September 15, the league may sell the team or renew termination proceedings against the Clippers or both.", + "length": 159 + }, + { + "text": "Donald Sterling was unable to spell \"world\" backward, was unaware of the season of the year and initially had difficulty drawing a clock, Platzer said in court papers.", + "length": 167 + }, + { + "text": "The PET scan on May 16 providing findings \"consistent with a neurodementia of the Alzheimer's type,\" Platzer said in his certification of Donald Sterling's incapacity.", + "length": 167 + }, + { + "text": "Ballmer has insisted, as a provision of this deal, Shelly Sterling get approval from the court that she has the authority as the sole trustee based upon the removal of Mr.", + "length": 171 + }, + { + "text": "Donald Sterling opposes a sale of the team and says he gave his wife a purported letter only to negotiate with a buyer, not to formally sell the team, his attorney said Wednesday.", + "length": 179 + }, + { + "text": "Donald Sterling initially vowed to fight the sale and filed a lawsuit against the NBA, then said he was going along with the sale -- until Monday, when he again pulled his support.", + "length": 180 + }, + { + "text": "We also have a third distinguished doctor who's an expert in this field, mental capacity, who has reviewed the evidence, and supports the other doctors and agrees on that conclusion.", + "length": 182 + }, + { + "text": "\"It strikes me as totally incredible to argue that this man -- I talk to him every day -- is incapable of making decisions and is mentally incompetent,\" Blecher said on Tuesday afternoon.", + "length": 187 + }, + { + "text": "Platzer, a California neurologist, examined Donald Sterling on May 19 and found he \"is suffering from cognitive impairment secondary to primary dementia Alzheimer's disease,\" court papers said.", + "length": 193 + }, + { + "text": "\"I agree that the history and the findings are highly suspect as representing the slow emergence of progressive dementia, and specifically Alzheimer's disease,\" Read said in documents filed in court.", + "length": 199 + }, + { + "text": "\"In addition, the findings described are fully consistent with the general loss of brain tissue and, more specifically, with the pattern of impaired brain functions demonstrated by the PET scan of May 16, 2014.", + "length": 210 + }, + { + "text": "You have massive amount of 150 real estate holdings, and it requires a person to run the business who is competent, and the doctor -- three doctors -- have said that he lacks the mental capacity,\" O'Donnell said.", + "length": 212 + }, + { + "text": "Donald Sterling, the team's other co-owner, doesn't want to sell the team as the National Basketball Association demands; this week he called the league \"despicable monsters\" and \"a band of hypocrites and bullies.", + "length": 213 + }, + { + "text": "Joining Sterling and O'Donnell at the courthouse Wednesday was the attorney for former Microsoft CEO Steve Ballmer, with whom Shelly Sterling has reached an agreement to sell the franchise for a record $2 billion.", + "length": 213 + }, + { + "text": "Spar said in May 27 letter that Donald Sterling \"is substantially unable to manage his finances and resist fraud and undue influence, and is no longer competent to act as trustee of his trust,\" court documents said.", + "length": 215 + }, + { + "text": "Los Angeles (CNN) -- Los Angeles Clippers co-owner Shelly Sterling asked a Los Angeles probate court Wednesday to uphold her negotiated sale of the team for $2 billion despite her husband's objections, her attorney said.", + "length": 220 + }, + { + "text": "He also made inflammatory comments to CNN's \"Anderson Cooper 360\" about African-Americans, which the NBA had planned to use as part of its evidence against him in an owners' meeting where a vote would be taken on whether to terminate his ownership rights.", + "length": 255 + }, + { + "text": "James Edward Spar, a specialist in geriatric psychiatry who examined Donald Sterling on May 22, said Sterling suffers \"mild global cognitive impairment\" and \"the overall picture is consistent with early Alzheimer's disease, but could reflect other forms of brain disease,\" court papers said.", + "length": 291 + }, + { + "text": "Platzer said in a May 29 certification that Donald Sterling has \"an impairment of his level of attention, information processing, short term memory impairment and ability to modulate mood, emotional liability, and is at risk of making potentially serious errors of judgment,\" court papers said.", + "length": 294 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4508466422557831 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:35.656818442Z", + "first_section_created": "2025-12-23T09:35:35.657173856Z", + "last_section_published": "2025-12-23T09:35:35.657655776Z", + "all_results_received": "2025-12-23T09:35:35.780870795Z", + "output_generated": "2025-12-23T09:35:35.781085404Z", + "total_processing_time_ms": 124, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 123, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:35.657173856Z", + "publish_time": "2025-12-23T09:35:35.657480069Z", + "first_worker_start": "2025-12-23T09:35:35.658134096Z", + "last_worker_end": "2025-12-23T09:35:35.779868Z", + "total_journey_time_ms": 122, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:35.658095995Z", + "start_time": "2025-12-23T09:35:35.658176598Z", + "end_time": "2025-12-23T09:35:35.658278902Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:35.658507Z", + "start_time": "2025-12-23T09:35:35.658663Z", + "end_time": "2025-12-23T09:35:35.779868Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 121 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:35.658201499Z", + "start_time": "2025-12-23T09:35:35.658287903Z", + "end_time": "2025-12-23T09:35:35.658396807Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:35.658053793Z", + "start_time": "2025-12-23T09:35:35.658134096Z", + "end_time": "2025-12-23T09:35:35.658181898Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:35.657553172Z", + "publish_time": "2025-12-23T09:35:35.657655776Z", + "first_worker_start": "2025-12-23T09:35:35.658290803Z", + "last_worker_end": "2025-12-23T09:35:35.757366Z", + "total_journey_time_ms": 99, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:35.658677519Z", + "start_time": "2025-12-23T09:35:35.658728221Z", + "end_time": "2025-12-23T09:35:35.658778723Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:35.658579Z", + "start_time": "2025-12-23T09:35:35.658764Z", + "end_time": "2025-12-23T09:35:35.757366Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 98 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:35.658389907Z", + "start_time": "2025-12-23T09:35:35.658448809Z", + "end_time": "2025-12-23T09:35:35.658512912Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:35.6582372Z", + "start_time": "2025-12-23T09:35:35.658290803Z", + "end_time": "2025-12-23T09:35:35.658322104Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 219, + "min_processing_ms": 98, + "max_processing_ms": 121, + "avg_processing_ms": 109, + "median_processing_ms": 121, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3777, + "slowest_section_id": 0, + "slowest_section_time_ms": 122 + } +} diff --git a/data/output/0061e7c11acc1f8f4c4f423db11f38df39b748b7.json b/data/output/0061e7c11acc1f8f4c4f423db11f38df39b748b7.json new file mode 100644 index 0000000..c3d3dcb --- /dev/null +++ b/data/output/0061e7c11acc1f8f4c4f423db11f38df39b748b7.json @@ -0,0 +1,230 @@ +{ + "file_name": "0061e7c11acc1f8f4c4f423db11f38df39b748b7.txt", + "total_words": 275, + "top_n_words": [ + { + "word": "the", + "count": 15 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "rage", + "count": 5 + }, + { + "word": "com", + "count": 4 + }, + { + "word": "for", + "count": 4 + }, + { + "word": "machine", + "count": 4 + }, + { + "word": "rollingstone", + "count": 4 + }, + { + "word": "s", + "count": 4 + }, + { + "word": "against", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "com .", + "length": 5 + }, + { + "text": "RollingStone.", + "length": 13 + }, + { + "text": "(RollingStone.", + "length": 14 + }, + { + "text": "\") RollingStone.", + "length": 16 + }, + { + "text": "Copyright © 2011 Rolling Stone.", + "length": 32 + }, + { + "text": "See the full story at RollingStone.", + "length": 35 + }, + { + "text": "com: 500 greatest albums of all time .", + "length": 38 + }, + { + "text": "(See the liquid note-bending on \"Township Rebellion.", + "length": 52 + }, + { + "text": "Rage was machine-like, yes, but built to change worlds.", + "length": 55 + }, + { + "text": "com: Rage Against the Machine box set marks 20th anniversary of first LP .", + "length": 74 + }, + { + "text": "com) -- Rage Against the Machine's 1992 debut is a grenade that keeps exploding.", + "length": 80 + }, + { + "text": "Among '90s albums, only \"Nevermind\" and \"The Chronic\" rival it for cultural impact.", + "length": 83 + }, + { + "text": "Like any good revolutionary sect, the band members weren't without their contradictions and tensions.", + "length": 101 + }, + { + "text": "Rage made hip-hop-tinged funk metal the new rebel music, taking over the alienation beat from grunge slackers and making Marxist sloganeering seem badass.", + "length": 154 + }, + { + "text": "The rap appropriation has lost the force of novelty, of course, but blaming Rage Against the Machine for Fred Durst is like blaming Abraham Lincoln for John Boehner.", + "length": 165 + }, + { + "text": "De la Rocha's throat-scraping eruptions about suicide (the fate of an outcast in \"Settle for Nothing\") and bullets in the head feel as primal as any lefty rock -- and maybe more so, heard from inside Morello's palace of sound.", + "length": 226 + }, + { + "text": "Remastered to museum-clean standards, the reissued album comes with DVDs of live shows and music videos, plus demos that prove just how down and detailed the group had every song (even if Morello still couldn't resist changing solos).", + "length": 234 + }, + { + "text": "Zack de la Rocha's blocky, academically aspirational rhymes preached leftist revolution, and guitarist and sonic architect Tom Morello practiced an almost authoritarian control and extreme technical precision as he mimicked sampling, sent down thunderous power chords and, occasionally, indulged in almost New Age-y solos.", + "length": 322 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.43408718705177307 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:36.158351576Z", + "first_section_created": "2025-12-23T09:35:36.158705691Z", + "last_section_published": "2025-12-23T09:35:36.158874498Z", + "all_results_received": "2025-12-23T09:35:36.227819862Z", + "output_generated": "2025-12-23T09:35:36.22801407Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:36.158705691Z", + "publish_time": "2025-12-23T09:35:36.158874498Z", + "first_worker_start": "2025-12-23T09:35:36.15940672Z", + "last_worker_end": "2025-12-23T09:35:36.226953Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:36.15940792Z", + "start_time": "2025-12-23T09:35:36.159466223Z", + "end_time": "2025-12-23T09:35:36.159502224Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:36.159566Z", + "start_time": "2025-12-23T09:35:36.159719Z", + "end_time": "2025-12-23T09:35:36.226953Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:36.159360418Z", + "start_time": "2025-12-23T09:35:36.159418921Z", + "end_time": "2025-12-23T09:35:36.159461022Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:36.159351718Z", + "start_time": "2025-12-23T09:35:36.15940672Z", + "end_time": "2025-12-23T09:35:36.159426821Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1711, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/00632057d189597441eac61427d63d8e16b6a09c.json b/data/output/00632057d189597441eac61427d63d8e16b6a09c.json new file mode 100644 index 0000000..dd2722d --- /dev/null +++ b/data/output/00632057d189597441eac61427d63d8e16b6a09c.json @@ -0,0 +1,560 @@ +{ + "file_name": "00632057d189597441eac61427d63d8e16b6a09c.txt", + "total_words": 1783, + "top_n_words": [ + { + "word": "the", + "count": 102 + }, + { + "word": "to", + "count": 46 + }, + { + "word": "and", + "count": 44 + }, + { + "word": "of", + "count": 41 + }, + { + "word": "you", + "count": 38 + }, + { + "word": "i", + "count": 32 + }, + { + "word": "long", + "count": 29 + }, + { + "word": "that", + "count": 26 + }, + { + "word": "a", + "count": 25 + }, + { + "word": "in", + "count": 25 + } + ], + "sorted_sentences": [ + { + "text": "9 per cent.", + "length": 11 + }, + { + "text": "' Van Gaal asked.", + "length": 17 + }, + { + "text": "'What do you think?", + "length": 19 + }, + { + "text": "The pamphlet said 71.", + "length": 21 + }, + { + "text": "But I have seen some facts.", + "length": 27 + }, + { + "text": "Eight of them were successful.", + "length": 30 + }, + { + "text": "Van Gaal claims United made 49.", + "length": 31 + }, + { + "text": "'But I want to talk about facts.", + "length": 32 + }, + { + "text": "Here are some of the best bits...", + "length": 33 + }, + { + "text": "'And about his behaviour with referees.", + "length": 39 + }, + { + "text": "I am not playing mind games, just facts.", + "length": 40 + }, + { + "text": "It is not so difficult also to read that.", + "length": 41 + }, + { + "text": "It is not so difficult also to read that.", + "length": 41 + }, + { + "text": "The blue arrows indicate a successful pass .", + "length": 44 + }, + { + "text": "Yeah, long balls, in the width, to switch the play.", + "length": 51 + }, + { + "text": "'Yeah, long balls, in the width, to switch the play.", + "length": 52 + }, + { + "text": "There are things that everyone can see every single week.", + "length": 57 + }, + { + "text": "'You can always improve, you can never have a perfect match.", + "length": 60 + }, + { + "text": "In the end it’s paid off for them so you can’t knock it.", + "length": 60 + }, + { + "text": "A ball to the forward striker is mainly caused long-ball play.", + "length": 62 + }, + { + "text": "A ball to the forward striker is mainly caused by long-ball play.", + "length": 65 + }, + { + "text": "Van Gaal's overall tactics have come in for criticism this season.", + "length": 66 + }, + { + "text": "'We need to know that I am talking about facts, not my impression.", + "length": 66 + }, + { + "text": "When you have 60 per cent ball possession you cannot play long balls.", + "length": 69 + }, + { + "text": "'When you have 60 per cent ball possession you cannot play long balls.", + "length": 70 + }, + { + "text": "We have talked about already from my first day, but we have to improve.", + "length": 71 + }, + { + "text": "'You have to see the data and put the data in the right context I believe.", + "length": 74 + }, + { + "text": "The Hammers attempted 200 passes, according to the figures, and United 343.", + "length": 75 + }, + { + "text": "9 per cent of 'long passes forward' in comparison to West Ham's 71 per cent .", + "length": 77 + }, + { + "text": "It's not how you normally see United play, but it got them a goal in the end.", + "length": 77 + }, + { + "text": "about me, or the game, or the players and now you say that you have no opinion.", + "length": 79 + }, + { + "text": "'I think we have played attractive games and less attractive games,' Van Gaal said.", + "length": 83 + }, + { + "text": "Van Gaal looks unimpressed as he tells a packed press conference about his tactics .", + "length": 84 + }, + { + "text": "'I am happy with the results, but I also think we can improve which is also the process.", + "length": 88 + }, + { + "text": "' Van Gaal's delivery was calm and he only came to the brink of losing his patience once.", + "length": 89 + }, + { + "text": "Shotbolt looks less than impressed as the United manager continues to defend his tactics .", + "length": 90 + }, + { + "text": "The Southampton manager he knows how Mr Ferguson works and how he works, he was very clear.", + "length": 91 + }, + { + "text": "When you have 0 per cent ball possession do you think that you can do that with long balls?", + "length": 91 + }, + { + "text": "Van Gaal talks regularly about the fact that he wants his teams to play attractive football.", + "length": 92 + }, + { + "text": "West Ham manager Allardyce was critical of Van Gaal's tactics following Sunday's encounter .", + "length": 92 + }, + { + "text": "'When you have 60 per cent ball possession do you think that you can do that with long balls?", + "length": 93 + }, + { + "text": "Van Gaal poses with Paddy McNair after the defender agreed an extension to his current contract .", + "length": 97 + }, + { + "text": "The 63-year-old hands the pages of his dossier to journalists during Tuesday's press conference .", + "length": 97 + }, + { + "text": "Cheikhou Kouyate opened the scoring for West Ham in their Premier League match against Man United .", + "length": 99 + }, + { + "text": "I want to be clear, I do not want to play mind games too early, although they seem to want to start.", + "length": 100 + }, + { + "text": "Van Gaal freely admits that Man United played more long balls when Maroruane Fellaini was introduced .", + "length": 102 + }, + { + "text": "Diagrams in the pamphlet claim to show United's passes went sideways or diagonally, rather than forward.", + "length": 104 + }, + { + "text": "United's change of tack came after 6ft 4in Fellaini was brought on in the second half, Van Gaal claimed .", + "length": 106 + }, + { + "text": "' Rafa Benitez infamously lost his cool at Sir Alex Ferguson during a press conference rant back in 2009 .", + "length": 106 + }, + { + "text": "West Ham made just 11 of their 32 'long and forward' passes during the match, according to boss Van Gaal .", + "length": 106 + }, + { + "text": "This page suggests United played 12 long passes after 76 minutes of their 1-1 draw at Upton Park on Sunday.", + "length": 107 + }, + { + "text": "Daley Blind scores a late equaliser for Man United at Upton Park to rescue a 1-1 draw for Van Gaal's side .", + "length": 107 + }, + { + "text": "Louis van Gaal brandishes part of his dossier as he attempts to prove that United are not a long-ball side .", + "length": 108 + }, + { + "text": "Van Gaal, pictured leaving the Upton Park pitch at half-time, flanked by Ryan Giggs and Albert Stuivenberg .", + "length": 108 + }, + { + "text": "Speaking after the match, Hammers boss Allardyce said:'’In the end we couldn’t cope with Long-ball United.", + "length": 110 + }, + { + "text": "1 per cent of West Ham's 'long passes' - of over 25 metres - went forward whereas United's percentage was just 49.", + "length": 114 + }, + { + "text": "'You might just criticise Louis van Gaal for playing long balls as much as I’m sometimes criticised for being direct.", + "length": 119 + }, + { + "text": "This sheet suggests United had a 56 per cent success rate of their 41 'long and forward' passes during the match at West Ham.", + "length": 125 + }, + { + "text": "' 'All managers need to know is that only Mr Ferguson can talk about the fixtures, can talk about referees and nothing happens.", + "length": 127 + }, + { + "text": "You have to look at the data and then you will see that we did play long balls, but long balls wide, rather than to the striker.", + "length": 128 + }, + { + "text": "You have to look at the data and then you will see that we did play long balls, but long balls wide, rather than to the striker.", + "length": 128 + }, + { + "text": "In 2009, Benitez erupted during a press conference and attacked former United boss Ferguson with some similarly pre-planned 'facts'.", + "length": 132 + }, + { + "text": "United may be fourth in the Barclays Premier League, but the team lacks the energy, dynamism and flair of the Sir Alex Ferguson era.", + "length": 132 + }, + { + "text": "' United's press officer, Karen Shotbolt, reacts as Van Gaal asks her to pass his dossier over to journalists at the press conference .", + "length": 135 + }, + { + "text": "Benitez said: 'I was surprised by what has been said, but maybe they [Manchester United] are nervous because we are at the top of the table.", + "length": 140 + }, + { + "text": "' After reeling off the statistics, Van Gaal rose from his chair and handed the pamphlet to the reporter who asked the question about Allardyce.", + "length": 144 + }, + { + "text": "So again, they did it 71 and we 49, and I give it you, you can copy it and then maybe you can go to Big Sam and he will get a good interpretation.", + "length": 146 + }, + { + "text": "When a colleague of mine is saying this kind of thing then, yeah, you have to see the data and you have to put the data in the right context I believe.", + "length": 151 + }, + { + "text": "Then, of course, with the quality of Fellaini we played more forward balls and we scored from that, so I think it was a very good decision of the manager.", + "length": 154 + }, + { + "text": "'Then, of course, with the quality of Fellaini we played more forward balls and we scored from that, so I think it was a very good decision of the manager.", + "length": 155 + }, + { + "text": "Manchester United manager Louis van Gaal has hit back at Sam Allardyce's claim that his team are a long-ball side - and insists he has the facts to prove it.", + "length": 157 + }, + { + "text": "But, when you see overall the long ball, and what is the percentage of that, then West Ham have played 71 per cent of the long balls to the forwards and we 49.", + "length": 159 + }, + { + "text": "So again, they did it 71 (per cent) and we 49, and I give it to you, you can copy it and then maybe you can go to Big Sam and he will get a good interpretation.", + "length": 160 + }, + { + "text": "'But, when you see overall the long ball, and what is the percentage of that, then West Ham have played 71 per cent of the long balls to the forwards and we 49.", + "length": 160 + }, + { + "text": "On 1 November, they played Hull and Mr Ferguson had a two-match touchline ban and a £10,000 fine after confronting Mike Dean, the referee, for improper conduct.", + "length": 161 + }, + { + "text": "The whole scene was a slightly bizarre episode that evoked memories of Rafa Benitez's press conference rant, directed at Sir Alex Ferguson, over referees in 2009.", + "length": 162 + }, + { + "text": "Because I expected this question, I have made an interpretation of the data for this game and then I have to say that it is not a good interpretation from Big Sam.", + "length": 163 + }, + { + "text": "’ The Dutchman however, insisted his team only started launching balls forward after 6ft 4in Marouane Fellaini came on in the 76th minute of the draw in east London.", + "length": 167 + }, + { + "text": "He was clearly, therefore, annoyed at Allardyce's comments and he had the pamphlet on his desk waiting when he fielded the first question about the West Ham boss' jibe.", + "length": 168 + }, + { + "text": "I am sorry, but we are playing ball possession play and after 70 minutes we did not succeed, in spite of many chances in the second half, then I changed my playing style.", + "length": 170 + }, + { + "text": "The former Barcelona boss insisted his team only started launching balls forward after 6ft 4in Marouane Fellaini came on in the 76th minute of the draw against West Ham .", + "length": 170 + }, + { + "text": "The United boss then asked them to analyse the last three pages, which contained diagrams which contained coloured arrows pointing in the direction of both team's passes.", + "length": 170 + }, + { + "text": "'I am sorry, but we are playing ball possession play and after 70 minutes we did not succeed, in spite of many chances in the second half, then I changed my playing style.", + "length": 171 + }, + { + "text": "Van Gaal was clearly annoyed at Sam Allardyce's comments and he had the pamphlet on his desk waiting when he fielded the first question about the West Ham manager's jibe .", + "length": 171 + }, + { + "text": "' Van Gaal snapped when first asked about Allardyce's comments, before he regained composure and reeled off his speech on why he thought the former Blackburn manager was wrong.", + "length": 176 + }, + { + "text": "So, I give you this and you can see that the blue ones are the good ones because long balls are also very difficult, which is why I began with the explanation of ball possession.", + "length": 178 + }, + { + "text": "'Because I expected this question, I have made an interpretation of the data for this game and then I have to say that it is not a good interpretation from Big Sam,' said Van Gaal.", + "length": 180 + }, + { + "text": "The statistics quoted in the A4 pamphlet, which were put together by Van Gaal's analysts, claim West Ham sent a higher percentage of their long passes forward than United during the match.", + "length": 188 + }, + { + "text": "'I give you this and you can see that the blue ones are the good ones because long balls are also very difficult, which is why I began with the explanation of ball possession,' the 63-year-old said.", + "length": 198 + }, + { + "text": "The United boss turned up for his press conference ahead of Wednesday's match at home to Burnley clutching four sheets of paper that he says rubbish claims Allardyce made after Sunday's draw at West Ham.", + "length": 203 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5806754529476166 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:36.6596254Z", + "first_section_created": "2025-12-23T09:35:36.659921913Z", + "last_section_published": "2025-12-23T09:35:36.660300728Z", + "all_results_received": "2025-12-23T09:35:36.758809121Z", + "output_generated": "2025-12-23T09:35:36.75903103Z", + "total_processing_time_ms": 99, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 98, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:36.659921913Z", + "publish_time": "2025-12-23T09:35:36.660113221Z", + "first_worker_start": "2025-12-23T09:35:36.660678844Z", + "last_worker_end": "2025-12-23T09:35:36.734997Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:36.660697045Z", + "start_time": "2025-12-23T09:35:36.660753547Z", + "end_time": "2025-12-23T09:35:36.66082915Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:36.660902Z", + "start_time": "2025-12-23T09:35:36.661046Z", + "end_time": "2025-12-23T09:35:36.734997Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:36.660697845Z", + "start_time": "2025-12-23T09:35:36.660763948Z", + "end_time": "2025-12-23T09:35:36.660986157Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:36.660600641Z", + "start_time": "2025-12-23T09:35:36.660678844Z", + "end_time": "2025-12-23T09:35:36.660735747Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:36.660141622Z", + "publish_time": "2025-12-23T09:35:36.660300728Z", + "first_worker_start": "2025-12-23T09:35:36.660771348Z", + "last_worker_end": "2025-12-23T09:35:36.757907Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:36.660748147Z", + "start_time": "2025-12-23T09:35:36.660790249Z", + "end_time": "2025-12-23T09:35:36.660866152Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:36.660998Z", + "start_time": "2025-12-23T09:35:36.661187Z", + "end_time": "2025-12-23T09:35:36.757907Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 96 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:36.660769448Z", + "start_time": "2025-12-23T09:35:36.66082815Z", + "end_time": "2025-12-23T09:35:36.66128877Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:36.660726446Z", + "start_time": "2025-12-23T09:35:36.660771348Z", + "end_time": "2025-12-23T09:35:36.66081545Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 169, + "min_processing_ms": 73, + "max_processing_ms": 96, + "avg_processing_ms": 84, + "median_processing_ms": 96, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4763, + "slowest_section_id": 1, + "slowest_section_time_ms": 97 + } +} diff --git a/data/output/00633afd20246d0416881cacc17621a2ded13509.json b/data/output/00633afd20246d0416881cacc17621a2ded13509.json new file mode 100644 index 0000000..a731dbd --- /dev/null +++ b/data/output/00633afd20246d0416881cacc17621a2ded13509.json @@ -0,0 +1,282 @@ +{ + "file_name": "00633afd20246d0416881cacc17621a2ded13509.txt", + "total_words": 707, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "police", + "count": 15 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "he", + "count": 9 + }, + { + "word": "as", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "19:47 EST, 5 March 2013 .", + "length": 25 + }, + { + "text": "18:36 EST, 5 March 2013 .", + "length": 25 + }, + { + "text": "Stephen Wright and Arthur Martin .", + "length": 34 + }, + { + "text": "Sorry we are unable to accept comments for legal reasons.", + "length": 57 + }, + { + "text": "He, too, is not suspected of receiving any payment from the media.", + "length": 66 + }, + { + "text": "Last night Padraig Reidy, of the campaign group Index on Censorship, said the latest arrest was ‘troubling’.", + "length": 112 + }, + { + "text": "Such a move would significantly weaken a journalist’s ability to resist police requests for confidential information on sources.", + "length": 130 + }, + { + "text": "‘Certainly this event taking place in a post-11 September world should have warranted the highest level of security,’ he said.", + "length": 130 + }, + { + "text": "Frank Armstrong, the former Assistant Commissioner of City of London Police, has been arrested over alleged leaks to a journalist .", + "length": 131 + }, + { + "text": "His report, which made 28 recommendations for improving the security of the Royal Family, was widely praised for not pulling punches.", + "length": 133 + }, + { + "text": "He added: ‘In the post-Leveson environment, it seems that more and more, informal contact between police officers and the Press is being outlawed.", + "length": 148 + }, + { + "text": "Rowell’s arrest coincided with Home Office plans to adopt many recommendations regarding contact between the Press and police set out in the Leveson Report.", + "length": 158 + }, + { + "text": "Such informal contact can have its own issues, but it is also crucially important for investigative reporting and scrutiny of the police and other authorities.", + "length": 159 + }, + { + "text": "Prince Charles and Prince William sample food and produce at the Anglesea Agricultural Food Fair, as part of the celebrations leading up to William's 21st birthday .", + "length": 165 + }, + { + "text": "Speaking in 2003, Armstrong said William’s party had been dealt with as a private event with low-key security, when in fact it should have been treated as a high-profile event.", + "length": 178 + }, + { + "text": "So far 108 people, including more than 70 journalists, have been arrested in relation to three linked inquiries launched by Scotland Yard in the wake of the phone-hacking scandal.", + "length": 179 + }, + { + "text": "As well as the Operation Weeting inquiry into phone-hacking and Operation Elveden, a third investigation, Operation Tuleta, is looking into computer hacking and other privacy breaches.", + "length": 184 + }, + { + "text": "His arrest comes just weeks after another respected senior officer, Met Chief Superintendent Andy Rowell, was arrested over alleged disclosures of confidential information to the media.", + "length": 185 + }, + { + "text": "A former deputy chief constable who wrote a scathing report about a breach of security at Prince William’s 21st birthday party was yesterday arrested over alleged leaks to a journalist.", + "length": 187 + }, + { + "text": "A larger-than-life character, with a keen interest in rugby and raising money for charity, Armstrong was popular with junior officers before he left City of London Police last September to work in corporate protection.", + "length": 218 + }, + { + "text": "Last night the police watchdog, whose investigators arrested  Armstrong on suspicion of misconduct in a public office, stressed that ‘at this stage’, no money is alleged to have changed hands between him and the reporter.", + "length": 226 + }, + { + "text": "’ The Independent Police Complaints Commission said its investigators had arrested a 52-year-old  on ‘suspicion of misconduct in public office in relation to the alleged passing of unauthorised information to a journalist’.", + "length": 230 + }, + { + "text": "Ten years ago, when 'Commander' of City of London Police, he rose to national prominence when he wrote a critical report about police failings which allowed an intruder to gatecrash Prince William's party at Windsor Castle in 2002 .", + "length": 232 + }, + { + "text": "Ten years ago, when ‘Commander’ of the same force, he rose to national prominence when he wrote a highly critical report about police failings which allowed an intruder to gatecrash Prince William’s party at Windsor Castle in 2002.", + "length": 237 + }, + { + "text": "Armstrong’s most recent job as Assistant Commissioner of City of London Police – equivalent in rank to a deputy chief constable – included responsibility for counter-terrorism and saw him oversee several highly sensitive fraud investigations.", + "length": 248 + }, + { + "text": "His arrest at his home in South-West London at 7am yesterday came just two months after he was awarded the Queen’s Police Medal for distinguished service and followed an illustrious career which included a spell as head of Tony Blair’s Special Branch protection team.", + "length": 271 + }, + { + "text": "Frank Armstrong, 52, until recently the £130,000-a-year second in charge of City of London Police, is the first chief officer to be arrested as a result of inquiries arising from Scotland Yard’s Operation Elveden – which was set up to investigate allegations of bribes paid to police officers and other public officials.", + "length": 325 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5554572939872742 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:37.161135734Z", + "first_section_created": "2025-12-23T09:35:37.161479749Z", + "last_section_published": "2025-12-23T09:35:37.161722859Z", + "all_results_received": "2025-12-23T09:35:37.229359068Z", + "output_generated": "2025-12-23T09:35:37.229538776Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:37.161479749Z", + "publish_time": "2025-12-23T09:35:37.161722859Z", + "first_worker_start": "2025-12-23T09:35:37.162177678Z", + "last_worker_end": "2025-12-23T09:35:37.228382Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:37.162099374Z", + "start_time": "2025-12-23T09:35:37.162177678Z", + "end_time": "2025-12-23T09:35:37.162262181Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:37.162394Z", + "start_time": "2025-12-23T09:35:37.162537Z", + "end_time": "2025-12-23T09:35:37.228382Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:37.162168377Z", + "start_time": "2025-12-23T09:35:37.16223308Z", + "end_time": "2025-12-23T09:35:37.162321784Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:37.162163577Z", + "start_time": "2025-12-23T09:35:37.162252481Z", + "end_time": "2025-12-23T09:35:37.162300883Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4431, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0063979897e79e8b23020fb34c78708c480be81a.json b/data/output/0063979897e79e8b23020fb34c78708c480be81a.json new file mode 100644 index 0000000..665192e --- /dev/null +++ b/data/output/0063979897e79e8b23020fb34c78708c480be81a.json @@ -0,0 +1,334 @@ +{ + "file_name": "0063979897e79e8b23020fb34c78708c480be81a.txt", + "total_words": 651, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "you", + "count": 22 + }, + { + "word": "and", + "count": 21 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "is", + "count": 10 + }, + { + "word": "on", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "f1", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Talal Musa .", + "length": 12 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "It’s due out September 21.", + "length": 28 + }, + { + "text": "Drivers, start your engines.", + "length": 28 + }, + { + "text": "11:02 EST, 2 September 2012 .", + "length": 29 + }, + { + "text": "10:53 EST, 2 September 2012 .", + "length": 29 + }, + { + "text": "'You can really attack the track.", + "length": 33 + }, + { + "text": "' The general look has improved, too.", + "length": 37 + }, + { + "text": "The daddy of all Formula 1 games is back.", + "length": 41 + }, + { + "text": "And, as an F1 ‘noob’, I’d have to agree.", + "length": 46 + }, + { + "text": "There have been the usual graphical upgrades, too.", + "length": 50 + }, + { + "text": "This is largely due to the ingenious Young Drivers Test.", + "length": 56 + }, + { + "text": "One massive improvement is the localised weather system.", + "length": 56 + }, + { + "text": "’ And for those after authenticity, then look no further.", + "length": 59 + }, + { + "text": "Formula fun: Yet again, the F1 game is shaping up to be a classic .", + "length": 67 + }, + { + "text": "It’s those extra elements that make you feel like a real F1 driver.", + "length": 69 + }, + { + "text": "Breathtaking: The attention to detail really is something to behold .", + "length": 69 + }, + { + "text": "Then there’s the Season Challenge, where you take part in ten races.", + "length": 70 + }, + { + "text": "This results in sudden weather changes in different parts of the track.", + "length": 71 + }, + { + "text": "Follow Talal on Twitter: @Mooseygamer and on Facebook: Daily Mail Games.", + "length": 72 + }, + { + "text": "The EGO engine has been tweaked so graphics seem sharper this time around.", + "length": 74 + }, + { + "text": "You start as a low-ranking team and select a rival driver you think you can beat.", + "length": 81 + }, + { + "text": "The attention to detail is fantastic, with dynamic lighting shining off the cars.", + "length": 81 + }, + { + "text": "Photorealistic: Squint your eyes and you'll feel that you're watching a race on TV .", + "length": 84 + }, + { + "text": "When you’re ready, you can test out your moves in a variety of different game modes.", + "length": 86 + }, + { + "text": "Yes, it’s F1 2012, which developers say is the most accessible and immersive F1 game out there.", + "length": 97 + }, + { + "text": "'The whole weather scenario system has really been refined this year,” said Paul Jeal, Game Director.", + "length": 103 + }, + { + "text": "'You get those moments where you’re not sure which tyres to be on and that really mixes up the racing.", + "length": 104 + }, + { + "text": "This even includes how to use the KERS (Kinetic Energy Recovery System) and the DRS (Drag Reduction System).", + "length": 108 + }, + { + "text": "Handling, however, does take some getting used to – no matter what difficulty you’re on, is hard to manage.", + "length": 111 + }, + { + "text": "Here, F1 novices are taught the basics of the sport through tutorials, and learn how to get the best from their cars.", + "length": 117 + }, + { + "text": "'You’ll get weather fronts come in now and then and it could be raining at one section of the track but dry on another.", + "length": 121 + }, + { + "text": "But it’s incredibly rewarding when you get it right, and there are useful speed / break markers which help keep you on course.", + "length": 128 + }, + { + "text": "Of course, if you’re still not up to the challenge, F1 2012 features all-new lap walk-throughs from F1 test driver Anthony Davidson.", + "length": 134 + }, + { + "text": "As you beat him over a series of races, you will win their seat in the better car – eventually working your way through the field to become champion.", + "length": 151 + }, + { + "text": "And now, thanks to loads of improvements, you’ll be able to feel the passion, speed and glory of the world’s most glamorous sport like never before.", + "length": 152 + }, + { + "text": "When one part of the track is wet while other areas are dry, the race will become more unpredictable and you’ll have to make game-changing strategy calls.", + "length": 156 + }, + { + "text": "'Significant work has gone into the physics model, particularly the suspension, which has a real change on the feel of the car,’ said Stephen Hood, Creative Director.", + "length": 168 + }, + { + "text": "All twelve teams and twenty-four drivers competing in the 2012 season will feature, and the brand-new, incomplete Circuit of the Americas in Austin, Texas — is included in the championship.", + "length": 191 + }, + { + "text": "Davidson will talk the player through a lap of each of the 20 circuits on the 2012 calendar, highlighting braking points, gearing and giving expert technical advice which will help players find extra speed on every lap.", + "length": 219 + }, + { + "text": "These include Champions mode – where you’re thrown straight into the action against one of six world champions (Kimi Räikkönen, Lewis Hamilton, Jenson Button, Fernando Alonso, Sebastian Vettel and Michael Schumacher) to try and beat them and become the ultimate champion.", + "length": 277 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.6762766242027283 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:37.662513163Z", + "first_section_created": "2025-12-23T09:35:37.662913179Z", + "last_section_published": "2025-12-23T09:35:37.663222692Z", + "all_results_received": "2025-12-23T09:35:37.739011941Z", + "output_generated": "2025-12-23T09:35:37.73922885Z", + "total_processing_time_ms": 76, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 75, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:37.662913179Z", + "publish_time": "2025-12-23T09:35:37.663222692Z", + "first_worker_start": "2025-12-23T09:35:37.663598408Z", + "last_worker_end": "2025-12-23T09:35:37.738058Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:37.663560406Z", + "start_time": "2025-12-23T09:35:37.663621809Z", + "end_time": "2025-12-23T09:35:37.663705012Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:37.663839Z", + "start_time": "2025-12-23T09:35:37.663984Z", + "end_time": "2025-12-23T09:35:37.738058Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:37.663535805Z", + "start_time": "2025-12-23T09:35:37.663598408Z", + "end_time": "2025-12-23T09:35:37.663690912Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:37.663546306Z", + "start_time": "2025-12-23T09:35:37.663617209Z", + "end_time": "2025-12-23T09:35:37.663665011Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 74, + "min_processing_ms": 74, + "max_processing_ms": 74, + "avg_processing_ms": 74, + "median_processing_ms": 74, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3735, + "slowest_section_id": 0, + "slowest_section_time_ms": 75 + } +} diff --git a/data/output/0063b38abe8bbfb635fc025c6ab2fcb990f73cc0.json b/data/output/0063b38abe8bbfb635fc025c6ab2fcb990f73cc0.json new file mode 100644 index 0000000..ca728c1 --- /dev/null +++ b/data/output/0063b38abe8bbfb635fc025c6ab2fcb990f73cc0.json @@ -0,0 +1,298 @@ +{ + "file_name": "0063b38abe8bbfb635fc025c6ab2fcb990f73cc0.txt", + "total_words": 443, + "top_n_words": [ + { + "word": "the", + "count": 18 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "have", + "count": 10 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "are", + "count": 7 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "we", + "count": 6 + }, + { + "word": "at", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "com.", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Scheirer.", + "length": 9 + }, + { + "text": "Craig Mackenzie .", + "length": 17 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'It's a very sensitive issue.", + "length": 29 + }, + { + "text": "You are dealing with a cemetery.", + "length": 32 + }, + { + "text": "including 714 Civil War veterans.", + "length": 33 + }, + { + "text": "' She plans to stay with her mother.", + "length": 36 + }, + { + "text": "There is just a lot of uncertainty now.", + "length": 39 + }, + { + "text": "The cemetery holds about 20,000 graves, .", + "length": 41 + }, + { + "text": "View more videos at: http://nbcphiladelphia.", + "length": 44 + }, + { + "text": "Last updated at 12:53 PM on 30th December 2011 .", + "length": 48 + }, + { + "text": "You are laid to rest and now it is being disturbed.", + "length": 51 + }, + { + "text": "Among them is a Medal of Honor winner, Ignatz Gresser.", + "length": 54 + }, + { + "text": "Five properties have been declared structurally unsafe.", + "length": 55 + }, + { + "text": "With sinkholes, you never know how far they will spread.", + "length": 56 + }, + { + "text": "She said: 'We're afraid we'll lose our home and everything in it.", + "length": 65 + }, + { + "text": "' Everette Carr, president of the association which maintains the 157-year old .", + "length": 80 + }, + { + "text": "Many of the dead were buried in wooden baskets as was the custom during that era.", + "length": 81 + }, + { + "text": "Ann Blacker was forced to leave the home where she has lived for nearly three decades.", + "length": 86 + }, + { + "text": "' Emergency workers have cut off power supplies and are now filling in the hole in with concrete.", + "length": 97 + }, + { + "text": "Under threat: A red tape cordons off the graves which face being exhumed because of the sinkhole .", + "length": 98 + }, + { + "text": "Danger: Twenty-five residents had to be evacuated from their homes after water flooded a basement .", + "length": 99 + }, + { + "text": "A shelter has been set up at an elementary school to accommodate evacuees who need somewhere to stay.", + "length": 101 + }, + { + "text": "'At this point, we don't know if the homes will have to be condemned or not,' said fire chief Robert C.", + "length": 103 + }, + { + "text": "'Once we get the street secured, we will get into these homes and determine whether any have to be razed.", + "length": 105 + }, + { + "text": "About 60 graves in Union and West End Cemetery are threatened have been roped off after several headstones tilted.", + "length": 114 + }, + { + "text": "A dozen homes half a block from the hole on 10th Street were evacuated yesterday after firemen found a basement flooded.", + "length": 120 + }, + { + "text": "non-profit burial ground, revealed there were are no detailed historical records beyond those whose graves have headstones.", + "length": 123 + }, + { + "text": "Officials in Allentown, Pennsylvania, have been given the go-ahead by a judge to exhume remains buried during the Civil War.", + "length": 124 + }, + { + "text": "The hole, measuring 50ft long and 30ft wide, was thought to have collapsed when a water main burst and flooded under a road.", + "length": 125 + }, + { + "text": "Lehigh County Coroner Scott Grim said: 'If any sites are in jeopardy, than we are going to have to make that decision to excavate.", + "length": 130 + }, + { + "text": "Hole in the road: Workmen gather at the 50ft long sinkhole in Allentown which is believed to have opened up after a water main burst .", + "length": 134 + }, + { + "text": "A sinkhole that forced the evacuation of 25 residents from their homes has spread to an historic cemetery, threatening dozens of graves.", + "length": 136 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7916885018348694 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:38.164184303Z", + "first_section_created": "2025-12-23T09:35:38.164528118Z", + "last_section_published": "2025-12-23T09:35:38.164677124Z", + "all_results_received": "2025-12-23T09:35:38.233571986Z", + "output_generated": "2025-12-23T09:35:38.233745293Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:38.164528118Z", + "publish_time": "2025-12-23T09:35:38.164677124Z", + "first_worker_start": "2025-12-23T09:35:38.165287449Z", + "last_worker_end": "2025-12-23T09:35:38.232581Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:38.165295049Z", + "start_time": "2025-12-23T09:35:38.165353452Z", + "end_time": "2025-12-23T09:35:38.165408154Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:38.165528Z", + "start_time": "2025-12-23T09:35:38.165672Z", + "end_time": "2025-12-23T09:35:38.232581Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:38.165295149Z", + "start_time": "2025-12-23T09:35:38.165362452Z", + "end_time": "2025-12-23T09:35:38.165456656Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:38.165220046Z", + "start_time": "2025-12-23T09:35:38.165287449Z", + "end_time": "2025-12-23T09:35:38.165326851Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2526, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/0064099432e9859a02db11ad01babc0cf40f58a3.json b/data/output/0064099432e9859a02db11ad01babc0cf40f58a3.json new file mode 100644 index 0000000..76a6250 --- /dev/null +++ b/data/output/0064099432e9859a02db11ad01babc0cf40f58a3.json @@ -0,0 +1,516 @@ +{ + "file_name": "0064099432e9859a02db11ad01babc0cf40f58a3.txt", + "total_words": 1001, + "top_n_words": [ + { + "word": "the", + "count": 53 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "it", + "count": 20 + }, + { + "word": "i", + "count": 18 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "on", + "count": 14 + }, + { + "word": "t", + "count": 12 + }, + { + "word": "video", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "Graham .", + "length": 8 + }, + { + "text": "Fashion .", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Anna Edwards .", + "length": 14 + }, + { + "text": "and the Walkman.", + "length": 16 + }, + { + "text": "Speak and Spell.", + "length": 16 + }, + { + "text": "PacMan, He-Man, tape .", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "It's incredible really.", + "length": 23 + }, + { + "text": "12:53 EST, 27 March 2013 .", + "length": 26 + }, + { + "text": "09:00 EST, 27 March 2013 .", + "length": 26 + }, + { + "text": "The 39-year-old gardener .", + "length": 26 + }, + { + "text": "it, and it went a bit crazy.", + "length": 28 + }, + { + "text": "up a few lines to my own song.", + "length": 30 + }, + { + "text": "into my phone so I didn't forget.", + "length": 33 + }, + { + "text": "Fish's infamous weather forecast.", + "length": 33 + }, + { + "text": "'I put it on Facebook and thanks to .", + "length": 37 + }, + { + "text": "'They were doing a feature of a \"40 .", + "length": 37 + }, + { + "text": "The song is a celebration of his and .", + "length": 38 + }, + { + "text": "'It is a great tune and very nostalgic.", + "length": 39 + }, + { + "text": "and other technology for entertainment.", + "length": 39 + }, + { + "text": "'Not long after that I heard the Billy .", + "length": 40 + }, + { + "text": "'To be honest, I don't own an iPad and .", + "length": 40 + }, + { + "text": "gay, Shake n Vac, Crackajack, Who shot JR?", + "length": 42 + }, + { + "text": "Billy Joel song 'We didn't start the fire'.", + "length": 43 + }, + { + "text": "I can understand why it has become so popular.", + "length": 46 + }, + { + "text": "casette, Walkman, Buckaroo, Emu, Michael Knight's car.", + "length": 54 + }, + { + "text": "company which specialises in helping viral video owners.", + "length": 56 + }, + { + "text": "he wrote when he was a teenager and found in his parent's attic.", + "length": 64 + }, + { + "text": "including tasty Marathon bars before they turned into Snickers .", + "length": 64 + }, + { + "text": "references TV programmes such as Mr Ben, Swap Shop, Grange Hill, .", + "length": 66 + }, + { + "text": "6 million views on YouTube since it went online earlier this month.", + "length": 67 + }, + { + "text": "enjoyed going back to my old days and how it used to be when I was .", + "length": 68 + }, + { + "text": "The video wistfully remembers long-forgotten features of the 1980s...", + "length": 69 + }, + { + "text": "with iconic events including Charles and Diana's wedding and Michael .", + "length": 70 + }, + { + "text": "millions of other people's childhood memories and is a parody of the .", + "length": 70 + }, + { + "text": "trends moon boots, shell suits, and shoulder pads are mentioned along .", + "length": 71 + }, + { + "text": "I don't have a clue how they work, but now people are relying on them .", + "length": 71 + }, + { + "text": "creating the video and whenever I came up with an idea I would tap it .", + "length": 71 + }, + { + "text": "has changed the lyrics that now namecheck dozens of items relating to .", + "length": 71 + }, + { + "text": "all of my friends who started sharing it, more and more people watched .", + "length": 72 + }, + { + "text": "Blockbusters and Knight Rider and games like Tonka trucks, Subbuteo and .", + "length": 73 + }, + { + "text": "Joel track which I have always liked, and before I knew it was thinking .", + "length": 73 + }, + { + "text": "opening verse begins with 'Timmy Mallet, Wacaday, Colin from Eastenders .", + "length": 73 + }, + { + "text": "married father-of-two came up with the idea after hearing a feature on a .", + "length": 74 + }, + { + "text": "list\", a list of things the presenter wanted to do before he was 40 that .", + "length": 74 + }, + { + "text": "the 80s such as PacMan, Roland Rat, TizWaz, Connect Four, video casettes .", + "length": 74 + }, + { + "text": "Graham Wood's hit 'We didn't own an iPad' has already received more than 1.", + "length": 75 + }, + { + "text": "And his nostalgic lyrics about the 1980s have made his track a YouTube sensation.", + "length": 81 + }, + { + "text": "'It is being shared on websites such as Facebook as well so it is really taking off.", + "length": 84 + }, + { + "text": "'The song is perfect for record labels and there will be a few companies keen on it.", + "length": 84 + }, + { + "text": "radio show about a man who found a 'things to do before the age of 40' list from the 1980s.", + "length": 91 + }, + { + "text": "The video is a parody of the Billy Jole classic, and has already nocthed up millions of views .", + "length": 95 + }, + { + "text": "Turn back time: This is a far cry from the slim, stylish technology that most people crave now .", + "length": 96 + }, + { + "text": "The height of technology: This game had children everywhere going mad to get their hands on one .", + "length": 97 + }, + { + "text": "The video has struck a chord with its audience, who have reminisced about the age before mobile phones .", + "length": 104 + }, + { + "text": "Cartoons such as Top Cat were a firm favourite amongst the younger generation who adored the cheeky feline .", + "length": 108 + }, + { + "text": "This toy was on the wish list for many a child, who pinned their hopes on Star Wars games, rather than iPads .", + "length": 110 + }, + { + "text": "Graham, who goes by hunkygraham1 on YouTube, said: 'I was listening to the radio when I came up with the idea.", + "length": 110 + }, + { + "text": "As well as the YouTube views, millions of social media users have also linked to the song, which has gone viral.", + "length": 112 + }, + { + "text": "'I spent hours poring over the images on Google but making the song and the video itself didn't take long at all.", + "length": 113 + }, + { + "text": "The gardener sings about the joy of using Teletext, rather than constantly tapping out text messages on a smart phone .", + "length": 119 + }, + { + "text": "This was the dreaded sight that many youngsters faced when they turned their televisions on before the programmes began .", + "length": 121 + }, + { + "text": "Playstations and X-Boxes were a long way off in the 1980s, when youngsters had to be content with board games like Connect 4 .", + "length": 126 + }, + { + "text": "They don't make them like that anymore: Paper £1 notes, Teletext and Top Cat area ll things of the past, the singer laments .", + "length": 126 + }, + { + "text": "Graham, from Cheshunt, Hertfordshire, provides the vocals and he got his musical friend Andrew Barker to play his ukelele for the backing track.", + "length": 144 + }, + { + "text": "In an age where it seems every man, woman and child has an iPad, one man has struck a chord by singing about the simpler times of his childhood.", + "length": 144 + }, + { + "text": "Graham Wood's (right) nostalgic hit 'We didn't own an iPad', which he produced with friend Andrew Barker (left) has already received more than 1.", + "length": 145 + }, + { + "text": "Damian Collier, founder of Viral Spiral, said: 'Once you pass one million hits that is a sign of considerable success from a viral video perspective.", + "length": 149 + }, + { + "text": "The video flashes up images that anybody alive in the 1980s will fondly remember - from a lack of mobile phones to Shep, the beloved Blue Peter dog .", + "length": 149 + }, + { + "text": "The video recalls events that shocked the world - including John Lennon's death - but keeps it lighthearted with references to children's book characters .", + "length": 155 + }, + { + "text": "' It then goes into the chorus of 'We didn't own an iPad, never heard of texting only Teletexting, we didn't own an iPad, never heard of SatNavs only used AA maps'.", + "length": 164 + }, + { + "text": "6 million views on YouTube and is a parody of the Billy Joel song 'We didn't start the fire' 'When I uploaded it to YouTube I didn't expect it to get so many views, I was hoping it might get 1,000 or so.", + "length": 203 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5673854947090149 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:38.665420826Z", + "first_section_created": "2025-12-23T09:35:38.665798441Z", + "last_section_published": "2025-12-23T09:35:38.666175457Z", + "all_results_received": "2025-12-23T09:35:38.758883308Z", + "output_generated": "2025-12-23T09:35:38.759221422Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:38.665798441Z", + "publish_time": "2025-12-23T09:35:38.66601325Z", + "first_worker_start": "2025-12-23T09:35:38.666550973Z", + "last_worker_end": "2025-12-23T09:35:38.757976Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:38.666576074Z", + "start_time": "2025-12-23T09:35:38.666645077Z", + "end_time": "2025-12-23T09:35:38.66673858Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:38.666858Z", + "start_time": "2025-12-23T09:35:38.667008Z", + "end_time": "2025-12-23T09:35:38.757976Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:38.666520771Z", + "start_time": "2025-12-23T09:35:38.666608475Z", + "end_time": "2025-12-23T09:35:38.66673228Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:38.66647707Z", + "start_time": "2025-12-23T09:35:38.666550973Z", + "end_time": "2025-12-23T09:35:38.666591574Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:38.666096154Z", + "publish_time": "2025-12-23T09:35:38.666175457Z", + "first_worker_start": "2025-12-23T09:35:38.666578974Z", + "last_worker_end": "2025-12-23T09:35:38.736481Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:38.666637676Z", + "start_time": "2025-12-23T09:35:38.666670778Z", + "end_time": "2025-12-23T09:35:38.666684678Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:38.666877Z", + "start_time": "2025-12-23T09:35:38.666993Z", + "end_time": "2025-12-23T09:35:38.736481Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:38.666589374Z", + "start_time": "2025-12-23T09:35:38.666618776Z", + "end_time": "2025-12-23T09:35:38.666637276Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:38.666548073Z", + "start_time": "2025-12-23T09:35:38.666578974Z", + "end_time": "2025-12-23T09:35:38.666584974Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 159, + "min_processing_ms": 69, + "max_processing_ms": 90, + "avg_processing_ms": 79, + "median_processing_ms": 90, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2731, + "slowest_section_id": 0, + "slowest_section_time_ms": 92 + } +} diff --git a/data/output/0064289ce8b7e0ce50e8a910bbf977864c7cc958.json b/data/output/0064289ce8b7e0ce50e8a910bbf977864c7cc958.json new file mode 100644 index 0000000..f90b40d --- /dev/null +++ b/data/output/0064289ce8b7e0ce50e8a910bbf977864c7cc958.json @@ -0,0 +1,358 @@ +{ + "file_name": "0064289ce8b7e0ce50e8a910bbf977864c7cc958.txt", + "total_words": 480, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "his", + "count": 18 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "was", + "count": 10 + }, + { + "word": "he", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "prince", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "occurred.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "attachment.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "“Why James?", + "length": 13 + }, + { + "text": "Rebecca English .", + "length": 17 + }, + { + "text": "held on March 26.", + "length": 17 + }, + { + "text": "only one for the job”.", + "length": 24 + }, + { + "text": "A military funeral was .", + "length": 24 + }, + { + "text": "11:29 EST, 10 April 2012 .", + "length": 26 + }, + { + "text": "19:53 EST, 10 April 2012 .", + "length": 26 + }, + { + "text": "A spokesman for the Prince said .", + "length": 33 + }, + { + "text": "explosion in Afghanistan in  2007.", + "length": 35 + }, + { + "text": "The married father of two, of 1st .", + "length": 35 + }, + { + "text": "WO2 Hassell from Chipping Campden, .", + "length": 36 + }, + { + "text": "William also lost his much admired .", + "length": 36 + }, + { + "text": "mission orders perfectly from memory.", + "length": 37 + }, + { + "text": "to disrupt his tutoring of the prince.", + "length": 38 + }, + { + "text": "Nicknamed ‘Safe Hands’ due to his .", + "length": 39 + }, + { + "text": "‘At the funeral I asked his captain .", + "length": 39 + }, + { + "text": "His close friend and fellow pilot Matt .", + "length": 40 + }, + { + "text": "He said: ‘He had carefully prepared a .", + "length": 41 + }, + { + "text": "‘His goalkeeping skills earned him the .", + "length": 42 + }, + { + "text": "career as he was a consummate professional.", + "length": 43 + }, + { + "text": "his thoughts are with Mr Hassell’s family.", + "length": 44 + }, + { + "text": "collision with a truck in Faringdon, Oxfordshire.", + "length": 49 + }, + { + "text": "teaching the future King we were shaking in our boots.", + "length": 54 + }, + { + "text": "” and he said, “We couldn’t afford a cock-up and he was the .", + "length": 67 + }, + { + "text": "goalkeeping skills, he taught William about the tactics and skills .", + "length": 68 + }, + { + "text": "required by military pilots when he joined the RAF on a four-month .", + "length": 68 + }, + { + "text": "Taddington, Gloucestershire, added: ‘When we found out our lad was .", + "length": 70 + }, + { + "text": "Webb, 36, revealed that he managed to keep his cool when jokers tried .", + "length": 71 + }, + { + "text": "Gloucestershire, died last month when his motorbike was involved in a .", + "length": 71 + }, + { + "text": "Army mentor from Sandhurst, Major Alexis Roberts, 32,  who died in an .", + "length": 72 + }, + { + "text": "nickname ‘Safe Hands’ – a name which followed him into his flying .", + "length": 73 + }, + { + "text": "Battalion The Royal Gurkha Rifles, was in charge of a 30-vehicle convoy .", + "length": 73 + }, + { + "text": "his computer as a practical joke – but he still managed to deliver the .", + "length": 74 + }, + { + "text": "day’s presentation for the prince when one of his friends wiped it from .", + "length": 75 + }, + { + "text": "yesterday: ‘The Duke of Cambridge has been informed of the sad news and .", + "length": 75 + }, + { + "text": "negotiating a stretch of road nicknamed ‘IED alley’ when the fatal blast .", + "length": 78 + }, + { + "text": "’ Other friends and family paid tribute to WO2 Hassell, who had served in Iraq and Afghanistan.", + "length": 97 + }, + { + "text": "Skilled: Pilot James Hassell, 36, was trusted to teach the future King of England Prince William how to fly .", + "length": 109 + }, + { + "text": "Prince William is mourning the loss of his former flying instructor who has been killed in a motorbike crash.", + "length": 109 + }, + { + "text": "’ At the time of the accident WO2 Hassell and his wife of 12 years, Lily, were about to buy their first home together.", + "length": 120 + }, + { + "text": "The respected Army Air Corps Lynx helicopter pilot was selected to teach the prince on a fast-track flying course four years ago.", + "length": 129 + }, + { + "text": "Mourning: Prince William, seen during his flight training, was saddened by news of the death of his former instructor James Hassell .", + "length": 133 + }, + { + "text": "’ Tragic: The death of Major Alexis Roberts, left, who was killed instantly, left Prince William, right, 'deeply saddened' His stepfather, Pat Salter, from .", + "length": 159 + }, + { + "text": "The Duke of Cambridge yesterday extended his ‘deepest sympathy’ to the grieving family of James Hassell, 36, a Warrant Officer Class 2 with the elite Wild Cat Fielding Team, which is based at RNAS Yeovilton in Somerset.", + "length": 223 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.729804277420044 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:39.166934863Z", + "first_section_created": "2025-12-23T09:35:39.167239676Z", + "last_section_published": "2025-12-23T09:35:39.167446985Z", + "all_results_received": "2025-12-23T09:35:39.243193511Z", + "output_generated": "2025-12-23T09:35:39.243362318Z", + "total_processing_time_ms": 76, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 75, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:39.167239676Z", + "publish_time": "2025-12-23T09:35:39.167446985Z", + "first_worker_start": "2025-12-23T09:35:39.167866202Z", + "last_worker_end": "2025-12-23T09:35:39.239026Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:39.167845201Z", + "start_time": "2025-12-23T09:35:39.167913504Z", + "end_time": "2025-12-23T09:35:39.168006908Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:39.168094Z", + "start_time": "2025-12-23T09:35:39.168224Z", + "end_time": "2025-12-23T09:35:39.239026Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:39.1678144Z", + "start_time": "2025-12-23T09:35:39.167866202Z", + "end_time": "2025-12-23T09:35:39.167930204Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:39.167863002Z", + "start_time": "2025-12-23T09:35:39.167991007Z", + "end_time": "2025-12-23T09:35:39.168018508Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2896, + "slowest_section_id": 0, + "slowest_section_time_ms": 71 + } +} diff --git a/data/output/006452a516381c34852f96e319402c91c12640a5.json b/data/output/006452a516381c34852f96e319402c91c12640a5.json new file mode 100644 index 0000000..3fdc6c0 --- /dev/null +++ b/data/output/006452a516381c34852f96e319402c91c12640a5.json @@ -0,0 +1,452 @@ +{ + "file_name": "006452a516381c34852f96e319402c91c12640a5.txt", + "total_words": 1019, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "i", + "count": 33 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "she", + "count": 22 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "up", + "count": 17 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "her", + "count": 15 + }, + { + "word": "make", + "count": 15 + }, + { + "word": "of", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "co.", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "' Despite .", + "length": 11 + }, + { + "text": "' Although .", + "length": 12 + }, + { + "text": "'I decided .", + "length": 12 + }, + { + "text": "Julian Robinson .", + "length": 17 + }, + { + "text": "According to patient.", + "length": 21 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "\" 'We’ve been together for six years now.", + "length": 43 + }, + { + "text": "birthmark would be a lot redder than it is now.", + "length": 47 + }, + { + "text": "has been a target for bullies throughout her life.", + "length": 50 + }, + { + "text": "She is pPictured right as a child with her father .", + "length": 51 + }, + { + "text": "raised more than £8million for Cancer Research UK.", + "length": 51 + }, + { + "text": "that it was no longer necessary, hating the procedure.", + "length": 54 + }, + { + "text": "She had laser surgery from her birth until she was ten.", + "length": 55 + }, + { + "text": "like,\" but he simply replied with \"it makes no difference to me.", + "length": 64 + }, + { + "text": "'No-one could say anything negative that I haven’t heard before.", + "length": 66 + }, + { + "text": "less effective as I got older, but I know that without it that my .", + "length": 67 + }, + { + "text": "my cosmetic make-up I’m able to conceal my birthmark well anyway.", + "length": 67 + }, + { + "text": "to meet him with no make-up on, I said \"this is what I really look .", + "length": 68 + }, + { + "text": "the positive response she has had to her no make-up selfie online, she .", + "length": 72 + }, + { + "text": "Amy Elsegood had her own reasons for posting a no make-up picture online.", + "length": 73 + }, + { + "text": "would probably think about having it again now that I’m older but with .", + "length": 74 + }, + { + "text": "added: 'Laser treatment tends to work best when you’re young, making it .", + "length": 75 + }, + { + "text": "'But then I met Josh at 16, he was the same age but from a different school.", + "length": 76 + }, + { + "text": "she had laser surgery from birth until the age of 10, Miss Elsegood decided .", + "length": 77 + }, + { + "text": "They vary in size and can if left untreated tend to get darker over the years.", + "length": 78 + }, + { + "text": "'Other kids would point at me, call me ‘blob face’ and say that I was ugly.", + "length": 79 + }, + { + "text": "Pictured right, she used to use special make up to conceal her port wine stain .", + "length": 80 + }, + { + "text": "Miss Elsegood, said: 'I was about four years old when I realised I was different.", + "length": 81 + }, + { + "text": "They are caused by abnormal blood vessels and are most commonly present from birth.", + "length": 83 + }, + { + "text": "' When she became a teenager she thought she would find it difficult to find a boyfriend.", + "length": 89 + }, + { + "text": "The 22-year-old used to be 'too self-conscious' to go out without hiding her birthmark first.", + "length": 93 + }, + { + "text": "'I’ve learnt that there are much worse things to have in life and I’ve accepted me for me.", + "length": 94 + }, + { + "text": "uk, about three in 1,000 babies are born with a port-wine stain - but they are not hereditary.", + "length": 94 + }, + { + "text": "' Cheryl Cole joined the make-up free selfie trend by posting a picture of herself on Instagram .", + "length": 97 + }, + { + "text": "'My birthmark doesn’t need to define me and no-one should be ashamed to show who they really are.", + "length": 99 + }, + { + "text": "Having put the picture on Facebook, she is receiving messages of support from people around the world.", + "length": 102 + }, + { + "text": "Port-wine stains are red or purple marks that often appear on the face but which can affect any area of skin.", + "length": 109 + }, + { + "text": "She said: 'It wasn’t until the cancer research no make-up selfie came about that I really accepted who I was.", + "length": 111 + }, + { + "text": "She added: 'I was so embarrassed by my port wine stain that I never thought anyone would accept me for who I was.", + "length": 113 + }, + { + "text": "A brave young woman has posted a no make-up 'selfie' online - to show the true extent of the birthmark on her face.", + "length": 115 + }, + { + "text": "'I even had one woman come up to me on holiday to say that I would’ve been pretty if it wasn’t for my birthmark.", + "length": 116 + }, + { + "text": "A number of treatments have been tried including using laser and other light sources to reduce the redness of stains.", + "length": 117 + }, + { + "text": "'The reaction on Facebook was amazing, I had over 600 likes with people all over the world messaging me, it was surreal.", + "length": 120 + }, + { + "text": "Amy Elsegood received support form around the world after she bravely posted a selfie of her birthmark, left,  on Facebook.", + "length": 124 + }, + { + "text": "Amy Elsegood, from Ripon, North Yorkshire, was determined to show people who she really was, posting the image on the internet.", + "length": 127 + }, + { + "text": "' However, Miss Elsegood still felt self-conscious when leaving the house and did not go anywhere without wearing her cover-up.", + "length": 127 + }, + { + "text": "'But then one day I just thought, \"you were born like this, there is no reason to be ashamed,\" so I posted a no make-up selfie.", + "length": 127 + }, + { + "text": "Michelle Heaton, Holly Willoughby and Kym Marsh also ditched make-up to show their natural beauty in their own bare-faced pictures.", + "length": 131 + }, + { + "text": "Despite the positive response Amy Elsegood has had to her no make-up selfie, she has been the target of bullies throughout her life .", + "length": 133 + }, + { + "text": "'Since posting my selfie online I have gained so much confidence, it’s been great helping others who are in a similar position too.", + "length": 133 + }, + { + "text": "The pretty 22-year-old has worn special make-up from the NHS since she was 11, helping her conceal the port wine stain on her right cheek.", + "length": 138 + }, + { + "text": "Miss Elsegood said: 'I wore it [the make-up] every time I left the house as I was too self-conscious to go out without hiding my birthmark first.", + "length": 145 + }, + { + "text": "When Miss Elsegood, right, became a teenager she thought she would find it difficult to find a boyfriend - but then she met Josh, pictured left .", + "length": 145 + }, + { + "text": "But the craze of taking bare-faced selfies and posting them on the internet has swept the online world this year - and raised millions for charity.", + "length": 147 + }, + { + "text": "'The first time I met him I was wearing make-up to conceal what was underneath but after a few weeks into our relationship I wanted to show him the real me.", + "length": 156 + }, + { + "text": "Since posting her selfie online Miss Elsegood, pictured on holiday says she has gained confidence - and hopes the initiative will inspire others in a similar position .", + "length": 168 + }, + { + "text": "The idea took off in March after huge numbers of women posted pictures of themselves wearing no make-up on social media websites - then nominated their friends to do the same.", + "length": 175 + }, + { + "text": "Among those to take part were a host of celebrities, including Prince Harry's ex-girlfriend Cressida Bonas, X-Factor judge Cheryl Cole, actresses Michelle Keegan and Helen Flanagan.", + "length": 181 + }, + { + "text": "At the time, Cancer Research UK said the donations would allow the charity to carry out 10 clinical trials that would not previously have been possible because of funding restrictions.", + "length": 184 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.43405577540397644 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:39.668267858Z", + "first_section_created": "2025-12-23T09:35:39.668722377Z", + "last_section_published": "2025-12-23T09:35:39.669220197Z", + "all_results_received": "2025-12-23T09:35:39.78822761Z", + "output_generated": "2025-12-23T09:35:39.788411418Z", + "total_processing_time_ms": 120, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 119, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:39.668722377Z", + "publish_time": "2025-12-23T09:35:39.669021089Z", + "first_worker_start": "2025-12-23T09:35:39.669619814Z", + "last_worker_end": "2025-12-23T09:35:39.787506Z", + "total_journey_time_ms": 118, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:39.669600213Z", + "start_time": "2025-12-23T09:35:39.669677716Z", + "end_time": "2025-12-23T09:35:39.669787021Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:39.669883Z", + "start_time": "2025-12-23T09:35:39.670028Z", + "end_time": "2025-12-23T09:35:39.787506Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 117 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:39.669555911Z", + "start_time": "2025-12-23T09:35:39.669619814Z", + "end_time": "2025-12-23T09:35:39.669719818Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:39.669561411Z", + "start_time": "2025-12-23T09:35:39.669629614Z", + "end_time": "2025-12-23T09:35:39.669709418Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:39.669104293Z", + "publish_time": "2025-12-23T09:35:39.669220197Z", + "first_worker_start": "2025-12-23T09:35:39.669619714Z", + "last_worker_end": "2025-12-23T09:35:39.782994Z", + "total_journey_time_ms": 113, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:39.669670516Z", + "start_time": "2025-12-23T09:35:39.669795821Z", + "end_time": "2025-12-23T09:35:39.669807522Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:39.66999Z", + "start_time": "2025-12-23T09:35:39.670128Z", + "end_time": "2025-12-23T09:35:39.782994Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 112 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:39.669630714Z", + "start_time": "2025-12-23T09:35:39.669669616Z", + "end_time": "2025-12-23T09:35:39.669692717Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:39.669573712Z", + "start_time": "2025-12-23T09:35:39.669619714Z", + "end_time": "2025-12-23T09:35:39.669627714Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 229, + "min_processing_ms": 112, + "max_processing_ms": 117, + "avg_processing_ms": 114, + "median_processing_ms": 117, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2772, + "slowest_section_id": 0, + "slowest_section_time_ms": 118 + } +} diff --git a/data/output/006475964d10b2163a5bcfbbadc9bacda55fb952.json b/data/output/006475964d10b2163a5bcfbbadc9bacda55fb952.json new file mode 100644 index 0000000..82df6f5 --- /dev/null +++ b/data/output/006475964d10b2163a5bcfbbadc9bacda55fb952.json @@ -0,0 +1,266 @@ +{ + "file_name": "006475964d10b2163a5bcfbbadc9bacda55fb952.txt", + "total_words": 317, + "top_n_words": [ + { + "word": "his", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "the", + "count": 10 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "as", + "count": 7 + }, + { + "word": "dying", + "count": 6 + }, + { + "word": "he", + "count": 6 + }, + { + "word": "i", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "his life.", + "length": 9 + }, + { + "text": "settlement.", + "length": 11 + }, + { + "text": "Friday in San Diego County.", + "length": 27 + }, + { + "text": "For Tim, the potential of .", + "length": 27 + }, + { + "text": "kill Meggan Lambesis last year.", + "length": 31 + }, + { + "text": "The singer said she had restricted his .", + "length": 40 + }, + { + "text": "if it's in a form restored to its roots.", + "length": 40 + }, + { + "text": "He had pleaded guilty to soliciting murder.", + "length": 43 + }, + { + "text": "important things in his personal life first.", + "length": 44 + }, + { + "text": "As I Lay Dying was formed in San Diego in 2000.", + "length": 47 + }, + { + "text": "The metal band was nominated for a Grammy in 2008.", + "length": 50 + }, + { + "text": "'He believes As I Lay Dying should only pick back up .", + "length": 54 + }, + { + "text": "U-T San Diego reports 33-year-old Timothy Lambesis was sentenced .", + "length": 66 + }, + { + "text": "that his wife would get a large share of his income in a divorce .", + "length": 66 + }, + { + "text": "pursuing As I Lay Dying again means working on a lot of other more .", + "length": 68 + }, + { + "text": "Authorities say Lambesis hired an undercover sheriff's detective to .", + "length": 69 + }, + { + "text": "visits with their children after a separation, and he also was angry .", + "length": 70 + }, + { + "text": "Meggan has also filed a $2 million civil suit against him, WTVA reports.", + "length": 72 + }, + { + "text": "period of time where he acted contrary to the person he's been most of .", + "length": 72 + }, + { + "text": "'Tim has acknowledged his guilt and clearly made some bad decisions in a .", + "length": 74 + }, + { + "text": "'As I Lay Dying is sleeping rather than dead,' wrote bandmate Jordan Mancino.", + "length": 77 + }, + { + "text": "Lambesis's bandmates have announced a hiatus as he deals with his legal troubles .", + "length": 82 + }, + { + "text": "The rocker pled guilty in February to soliciting an undercover officer to murder her.", + "length": 85 + }, + { + "text": "His attorney said Lambesis acted out of character and the behavior was sparked by steroid use.", + "length": 94 + }, + { + "text": "The lead singer of the band As I Lay Dying has been sentenced to six years in prison for plotting to kill his wife.", + "length": 115 + }, + { + "text": "The rest of his band is on hiatus, and has recorded new material with another singer, Shane Bay, to be released under a different name.", + "length": 135 + }, + { + "text": "Tim Lambesis, lead singer for the heavy metal band As I Lay Dying, has been sentenced to six years in prison for plotting to kill his wife .", + "length": 140 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7306225299835205 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:40.169995069Z", + "first_section_created": "2025-12-23T09:35:40.170312182Z", + "last_section_published": "2025-12-23T09:35:40.170475589Z", + "all_results_received": "2025-12-23T09:35:40.2385748Z", + "output_generated": "2025-12-23T09:35:40.238733107Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:40.170312182Z", + "publish_time": "2025-12-23T09:35:40.170475589Z", + "first_worker_start": "2025-12-23T09:35:40.17099661Z", + "last_worker_end": "2025-12-23T09:35:40.237629Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:40.170945108Z", + "start_time": "2025-12-23T09:35:40.17099661Z", + "end_time": "2025-12-23T09:35:40.171035212Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:40.171189Z", + "start_time": "2025-12-23T09:35:40.171331Z", + "end_time": "2025-12-23T09:35:40.237629Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:40.170969409Z", + "start_time": "2025-12-23T09:35:40.171030012Z", + "end_time": "2025-12-23T09:35:40.171065613Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:40.170941008Z", + "start_time": "2025-12-23T09:35:40.17099831Z", + "end_time": "2025-12-23T09:35:40.171018711Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1735, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/0064944e367697b5d7aa20aed67be30a7c724dee.json b/data/output/0064944e367697b5d7aa20aed67be30a7c724dee.json new file mode 100644 index 0000000..1c26c32 --- /dev/null +++ b/data/output/0064944e367697b5d7aa20aed67be30a7c724dee.json @@ -0,0 +1,314 @@ +{ + "file_name": "0064944e367697b5d7aa20aed67be30a7c724dee.txt", + "total_words": 745, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "s", + "count": 22 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "winston", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "as", + "count": 12 + }, + { + "word": "was", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "David Wilkes .", + "length": 14 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "05:45 EST, 18 April 2013 .", + "length": 26 + }, + { + "text": "17:52 EST, 17 April 2013 .", + "length": 26 + }, + { + "text": "The other shows Baroness Thatcher’s funeral cortege yesterday.", + "length": 64 + }, + { + "text": "Baroness Thatcher's coffin leaves St Paul's cathedral yesterday .", + "length": 65 + }, + { + "text": "One shows the funeral cortege of Sir Winston Churchill on January 30, 1965.", + "length": 75 + }, + { + "text": "You cannot help but be struck by the remarkable similarities between the two scenes.", + "length": 84 + }, + { + "text": "A total of 321,360 people filed past the catafalque while Sir Winston was lying in state.", + "length": 89 + }, + { + "text": "’ Silent crowds watched as Churchill’s coffin left Westminster Hall as Big Ben struck 9.", + "length": 92 + }, + { + "text": "Sombre: The Queen at Churchill's funeral, left, and striking a similar pose yesterday, right .", + "length": 94 + }, + { + "text": "Another notable change in the scene today is the removal of the railway bridge over Ludgate Hill.", + "length": 97 + }, + { + "text": "Sir Winston Churchill's coffin leaves St Paul's cathedral following his funeral service in 1965 .", + "length": 97 + }, + { + "text": "Two great leaders: Winston Churchill and Margaret Thatcher in their prime as British Prime Ministers .", + "length": 102 + }, + { + "text": "The Queen and the Duke Edinburgh attended, as did the Queen Mother and the then teenage Prince Charles.", + "length": 103 + }, + { + "text": "Sir Winston’s was a full state funeral, whereas Baroness Thatcher’s yesterday was a ceremonial occasion.", + "length": 108 + }, + { + "text": "Towering between it and St Paul’s now is also the Leadenhall Building, which will be 737ft when completed.", + "length": 108 + }, + { + "text": "The echoes of Winston Churchill's funeral were felt throughout the day as Baroness Thatcher was laid to rest .", + "length": 110 + }, + { + "text": "But, apart from a few radical additions to the City of London’s skyline, it is almost as if time has stood still.", + "length": 115 + }, + { + "text": "It was only 38 years later, in 2003, that the 591ft Gherkin was completed on the former site of the Baltic Exchange.", + "length": 116 + }, + { + "text": "45am and started its journey through London, the skyline then dominated by the dome of St Paul’s and free from skyscrapers.", + "length": 125 + }, + { + "text": "Yesterday, soldiers and public lined the same historic street to pay their respects to Baroness Thatcher as she was laid to rest .", + "length": 130 + }, + { + "text": "A tale of two leaders: Crowds line Fleet Street as Sir Winston Churchill's coffin is led to St Paul's Cathedral on January 30, 1965.", + "length": 132 + }, + { + "text": "Dark and quiet as the night-time Thames itself, it flows through Westminster Hall, eddying about the foot of the rock called Churchill.", + "length": 135 + }, + { + "text": "Both events were photographed from the same position, looking down as the processions passed along Fleet Street on the way to St Paul’s.", + "length": 138 + }, + { + "text": "Together with the then Prime Minister, Harold Wilson, and representatives of 112 countries they packed into the cathedral for the service.", + "length": 138 + }, + { + "text": "Some 48 years, two months, 16 days and – if you look very closely at the clocks – just a few minutes separate these images of the two solemn events.", + "length": 152 + }, + { + "text": "At Sir Winston’s funeral the mourners were led by his wife, Lady Clementine Churchill, his son Randolph and daughters Mary Soames and Lady Sarah Audley.", + "length": 154 + }, + { + "text": "Standing tall: Winston Churchill's gun carriage goes past the Houses of Parliament in 1965, left, and, right, Baroness Thatcher's hearse takes the same route yesterday .", + "length": 169 + }, + { + "text": "By decree of the Queen, Sir Winston Churchill's body lay in State in Westminster Hall for three days and a state funeral service was held at St Paul's Cathedral in 1965 .", + "length": 170 + }, + { + "text": "It was in a piece about the lying-in-state of Sir Winston that the Daily Mail’s Vincent Mulchrone famously wrote: ‘Two rivers run silently through London tonight, and one is made of people.", + "length": 193 + }, + { + "text": "On both occasions, crowds lined the streets in their masses to pay their last respects as the Union Jack-draped coffins passed by on gun carriages and the military featured heavily with similar pomp.", + "length": 199 + }, + { + "text": "Whereas Baroness Thatcher was cremated after her funeral service, Sir Winston was laid to rest in the Oxfordshire parish churchyard of Bladon, close to Blenheim Palace where he was born 90 years before, with only family members present.", + "length": 236 + }, + { + "text": "The Queen struck an almost identical pose to the one she adopted yesterday, with her handbag hooked over her left arm and her hands clutched in front of her, as she contemplated the loss of Sir Winston, the first of the 12 Prime Ministers to have served during her reign.", + "length": 271 + }, + { + "text": "But in practice there are few real differences between the two, except that a state funeral has to be approved by a vote in Parliament and with a state funeral the deceased would normally be expected to lie in state for three days in Westminster Hall to allow members of the public to pay their respects.", + "length": 304 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4526100754737854 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:40.671255961Z", + "first_section_created": "2025-12-23T09:35:40.67293423Z", + "last_section_published": "2025-12-23T09:35:40.673226042Z", + "all_results_received": "2025-12-23T09:35:40.741864575Z", + "output_generated": "2025-12-23T09:35:40.742041283Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:40.67293423Z", + "publish_time": "2025-12-23T09:35:40.673226042Z", + "first_worker_start": "2025-12-23T09:35:40.673557756Z", + "last_worker_end": "2025-12-23T09:35:40.740951Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:40.673525755Z", + "start_time": "2025-12-23T09:35:40.673588557Z", + "end_time": "2025-12-23T09:35:40.67366486Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:40.673795Z", + "start_time": "2025-12-23T09:35:40.673935Z", + "end_time": "2025-12-23T09:35:40.740951Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:40.673551756Z", + "start_time": "2025-12-23T09:35:40.673615158Z", + "end_time": "2025-12-23T09:35:40.673720463Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:40.673493653Z", + "start_time": "2025-12-23T09:35:40.673557756Z", + "end_time": "2025-12-23T09:35:40.673595657Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4319, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/0064cb4483aea62ced1d8ed67c8129497bb303cb.json b/data/output/0064cb4483aea62ced1d8ed67c8129497bb303cb.json new file mode 100644 index 0000000..463c320 --- /dev/null +++ b/data/output/0064cb4483aea62ced1d8ed67c8129497bb303cb.json @@ -0,0 +1,266 @@ +{ + "file_name": "0064cb4483aea62ced1d8ed67c8129497bb303cb.txt", + "total_words": 629, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "band", + "count": 12 + }, + { + "word": "i", + "count": 10 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "day", + "count": 8 + }, + { + "word": "green", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "(CNN) -- Ah, fandom, it's a curious and wonderful thing.", + "length": 56 + }, + { + "text": "\"21st Century Breakdown\" has an almost steampunk feel to it.", + "length": 60 + }, + { + "text": "Maybe an Easter egg is hiding in the shadows for us to discover?", + "length": 64 + }, + { + "text": "From what I've seen, even veteran Rock Band players will need them.", + "length": 67 + }, + { + "text": "Fans both new and old will enjoy playing along with their favorite songs.", + "length": 73 + }, + { + "text": "Now we can also tell our friends, \"yeah, I five-starred 'Brain Stew/Jaded!", + "length": 74 + }, + { + "text": "Sore arms and scratchy voice aside, \"Green Day: Rock Band\" is a lot of fun.", + "length": 75 + }, + { + "text": "There are also unique drum lessons written specifically for this version of the game.", + "length": 85 + }, + { + "text": "It's almost like being at a concert -- I nearly forgot I was supposed to start singing.", + "length": 87 + }, + { + "text": "Also notable is that the band's clothing choices match the theme and era of each album.", + "length": 87 + }, + { + "text": "As with most of the \"Rock Band\" games, when you earn four stars or more on a song, you get a reward.", + "length": 100 + }, + { + "text": "Most of the songs included in the game are ones that fans of Green Day can listen to again and again.", + "length": 101 + }, + { + "text": "But with so many other versions of \"Rock Band\" out there, why would you want to spend money on this game?", + "length": 105 + }, + { + "text": "Well, for starters, any fanboy or fangirl will love the loading screens with Green Day specific graphics and sounds.", + "length": 116 + }, + { + "text": "In the Oakland venue the band is decked out in waistcoats, trousers and pin-striped shirts to match the vibe of the album.", + "length": 122 + }, + { + "text": "You also get to view cool memorabilia, like still photos and rare video footage of the band, for completing songs in career mode.", + "length": 129 + }, + { + "text": "When the game was released on Tuesday, I couldn't wait to get home and see if I could play like Tre Cool or sing like Billie Joe Armstrong.", + "length": 139 + }, + { + "text": "The graphics are really well done in this game and the motion-capture technique used to animate the Green Day doppelganger is pretty impressive.", + "length": 144 + }, + { + "text": "When it was announced last year that we would be getting a Green Day version of \"Rock Band,\" fans of the band (myself included) were pretty excited.", + "length": 148 + }, + { + "text": "We may not all be able to unlock achievements like \"It's All Fun Until Someone Gets Hurt\" or \"Louder Than Bombs or Eternity,\" but we'll have fun trying.", + "length": 152 + }, + { + "text": "My other \"band mate\" was busy clacking away on his guitar and wouldn't have been able to sing if he tried, nor would I when behind my electronic drum kit.", + "length": 154 + }, + { + "text": "The band that helped turn the '90s punk-rock revival into a more mainstream, pop-radio movement would be getting some major attention in the digital world.", + "length": 155 + }, + { + "text": "All the members of Green Day play an instrument and sing at the same time, which is no small feat given the technical difficulty of the bass lines and Tre Cool's blazing fast drumming speed.", + "length": 190 + }, + { + "text": "When the piano opening to \"Viva La Gloria (Little Girl)\" starts playing, Billie Joe encourages the audience to clap and fakes surprise when the piano stops before the song kicks into high gear.", + "length": 193 + }, + { + "text": "In this case, you get \"cred\" instead of \"fans\" and with that the ability to open up more sets with tougher songs like \"Peacemaker\" from \"21st Century Breakdown\" -- a fun, fast-paced song about death and destruction.", + "length": 215 + }, + { + "text": "' \" Hardcore \"Rock Band\" fans have posted videos on YouTube of themselves playing in expert mode and achieving five gold stars, which is something that not even the members of Green Day were able to do, according to a recent interview with MTV.", + "length": 244 + }, + { + "text": "Playing songs like \"When I Come Around\" and \"Pulling Teeth\" brings back memories of college days, and yes, I will admit I wish they had put \"All By Myself\" in the game, because it's the only hidden track that Green Day has ever included on an album.", + "length": 249 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.53276526927948 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:41.174351428Z", + "first_section_created": "2025-12-23T09:35:41.174686342Z", + "last_section_published": "2025-12-23T09:35:41.174841749Z", + "all_results_received": "2025-12-23T09:35:41.23810926Z", + "output_generated": "2025-12-23T09:35:41.238295468Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:41.174686342Z", + "publish_time": "2025-12-23T09:35:41.174841749Z", + "first_worker_start": "2025-12-23T09:35:41.175397271Z", + "last_worker_end": "2025-12-23T09:35:41.236686Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:41.175379971Z", + "start_time": "2025-12-23T09:35:41.175453874Z", + "end_time": "2025-12-23T09:35:41.175530877Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:41.175564Z", + "start_time": "2025-12-23T09:35:41.175691Z", + "end_time": "2025-12-23T09:35:41.236686Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:41.175391271Z", + "start_time": "2025-12-23T09:35:41.175443173Z", + "end_time": "2025-12-23T09:35:41.175516476Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:41.175338769Z", + "start_time": "2025-12-23T09:35:41.175397271Z", + "end_time": "2025-12-23T09:35:41.175431473Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3409, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0064f5f840e6ed73d524798498c433eb8817b8b9.json b/data/output/0064f5f840e6ed73d524798498c433eb8817b8b9.json new file mode 100644 index 0000000..162bde2 --- /dev/null +++ b/data/output/0064f5f840e6ed73d524798498c433eb8817b8b9.json @@ -0,0 +1,354 @@ +{ + "file_name": "0064f5f840e6ed73d524798498c433eb8817b8b9.txt", + "total_words": 612, + "top_n_words": [ + { + "word": "the", + "count": 44 + }, + { + "word": "of", + "count": 29 + }, + { + "word": "in", + "count": 24 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "on", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "women", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "bodies", + "count": 8 + }, + { + "word": "baghdad", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "A .", + "length": 3 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "'We .", + "length": 5 + }, + { + "text": "inside.", + "length": 7 + }, + { + "text": "Shiite .", + "length": 8 + }, + { + "text": "Police .", + "length": 8 + }, + { + "text": "' While .", + "length": 9 + }, + { + "text": "Officers .", + "length": 10 + }, + { + "text": "prostitution.", + "length": 13 + }, + { + "text": "down the stairs.", + "length": 16 + }, + { + "text": "floor of bathroom.", + "length": 18 + }, + { + "text": "and northern Iraq a month ago.", + "length": 30 + }, + { + "text": "cupboard in the kitchen shot to death there.", + "length": 44 + }, + { + "text": "MailOnline has decided not to publish these images.", + "length": 51 + }, + { + "text": "In another the bodies of eight women are seen lying .", + "length": 53 + }, + { + "text": "sole access point to the building has been blocked off.", + "length": 55 + }, + { + "text": "in a pool of blood in what appears to be a living room.", + "length": 55 + }, + { + "text": "the killings were carried out by Shiite militants opposed to .", + "length": 62 + }, + { + "text": "have now cordoned off the street, with residents reporting that the .", + "length": 69 + }, + { + "text": "scrawling 'this is the fate of any prostitution' on one of the doors.", + "length": 69 + }, + { + "text": "entered a flat and found bodies everywhere, some lying on the sofa, .", + "length": 69 + }, + { + "text": "one image the bodies of five women are seen slumped together on the .", + "length": 69 + }, + { + "text": "up the stairs, we saw a couple of women's bodies and blood streaming .", + "length": 70 + }, + { + "text": "believe men using silenced weapons carried out the executions, before .", + "length": 71 + }, + { + "text": "Police arrived at the building to find blood streaming down the stairs.", + "length": 71 + }, + { + "text": "militants led by jihadist fighters took over large swathes of eastern .", + "length": 71 + }, + { + "text": "Police arrived at the building to find blood streaming down the stairs.", + "length": 71 + }, + { + "text": "militias have become more active on the streets of Baghdad since Sunni .", + "length": 72 + }, + { + "text": "police officer speaking on condition of anonymity said: 'When we walked .", + "length": 73 + }, + { + "text": "some on the ground, and one woman who apparently had tried to hide in a .", + "length": 73 + }, + { + "text": "no group has yet claimed responsibility for the murders, locals believe .", + "length": 73 + }, + { + "text": "Most of the victims appear to have been killed by gunshost wounds to the head.", + "length": 78 + }, + { + "text": "Inside they discovered the bodies of dozens of women scattered around two apartments.", + "length": 85 + }, + { + "text": "Inside they discovered the bodies of dozens of women scattered around two apartments .", + "length": 86 + }, + { + "text": "apartments in they city's Zayouna district on Saturday evening before massacring everyone .", + "length": 91 + }, + { + "text": "Gunmen wearing camouflage and carrying weapons fitted with silencers reportedly burst into two .", + "length": 96 + }, + { + "text": "A message scrawled on a wall inside one of the apartments read: 'This is the fate of any prostitution.", + "length": 102 + }, + { + "text": "While nobody has claimed responsibility for the attack, it is thought Shiite militants are behind it .", + "length": 102 + }, + { + "text": "Gunmen in Baghdad have executed at least 29 people including 20 women in an apparent raid on a brothel .", + "length": 104 + }, + { + "text": "Horrific pictures have emerged showing the bodies of some of the 25 women murdered in a raid on a Baghdad brothel.", + "length": 114 + }, + { + "text": "The sudden assault by ISIS earlier this year lead marches by Shiite Muslims in Iraq's capital in a show of defiance.", + "length": 116 + }, + { + "text": "' The attacks happened late on Saturday night at an apartment block in , a mixed Sunni and Shiite district of Baghdad.", + "length": 118 + }, + { + "text": "The gunmen, who are believed to be Shia militants, reportedly burst into two of these apartments murdering everbody inside .", + "length": 124 + }, + { + "text": "Victims: Police stand over the bodies of some of the 25 women murdered during a raid on a brothel in a upmarket district of Baghdad .", + "length": 133 + }, + { + "text": "The deaths happened in the Zayouna district of Baghdad, the same district where seven women and five men were shot in a similar attack in May last year .", + "length": 153 + }, + { + "text": "The latest attacks in Baghdad mirror executions carried out by Shiite militias in May 2013 in the same district, when gunmen attacked brothels and alcohol shops.", + "length": 161 + }, + { + "text": "Sunni ISIS fighters have seized most of the country's north, including all of its borders with Syria, in an attempt to set up a single Islamic state across country lines.", + "length": 170 + }, + { + "text": "Violence is at its highest level in Iraq in recent years according to the UN, with 2,417 people killed in June, the majority of them civilians, the highest number since 2007.", + "length": 174 + }, + { + "text": "Seven women and five men were killed on that occasion after gunmen burst into a brothel, while 12 shopkeepers died after militants restrained a nearby police officer before shooting them.", + "length": 187 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8639473915100098 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:41.675571618Z", + "first_section_created": "2025-12-23T09:35:41.675872631Z", + "last_section_published": "2025-12-23T09:35:41.676037138Z", + "all_results_received": "2025-12-23T09:35:41.737758785Z", + "output_generated": "2025-12-23T09:35:41.737941593Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:41.675872631Z", + "publish_time": "2025-12-23T09:35:41.676037138Z", + "first_worker_start": "2025-12-23T09:35:41.676735566Z", + "last_worker_end": "2025-12-23T09:35:41.736892Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:41.676696765Z", + "start_time": "2025-12-23T09:35:41.676765668Z", + "end_time": "2025-12-23T09:35:41.67682157Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:41.676886Z", + "start_time": "2025-12-23T09:35:41.677012Z", + "end_time": "2025-12-23T09:35:41.736892Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:41.676711065Z", + "start_time": "2025-12-23T09:35:41.676755467Z", + "end_time": "2025-12-23T09:35:41.67682347Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:41.676658763Z", + "start_time": "2025-12-23T09:35:41.676735566Z", + "end_time": "2025-12-23T09:35:41.676772968Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3612, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/006537b761457f62b4f9602dfc149f93a730d562.json b/data/output/006537b761457f62b4f9602dfc149f93a730d562.json new file mode 100644 index 0000000..1b667d0 --- /dev/null +++ b/data/output/006537b761457f62b4f9602dfc149f93a730d562.json @@ -0,0 +1,278 @@ +{ + "file_name": "006537b761457f62b4f9602dfc149f93a730d562.txt", + "total_words": 669, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "but", + "count": 12 + }, + { + "word": "he", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "it", + "count": 11 + }, + { + "word": "his", + "count": 10 + }, + { + "word": "is", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "‘But he is always being criticised.", + "length": 37 + }, + { + "text": "I’m sure he will return to his best.", + "length": 38 + }, + { + "text": "‘Diego has made a huge impact,’ he said.", + "length": 44 + }, + { + "text": "‘I have a very good relationship with both players.", + "length": 53 + }, + { + "text": "That’s very important for Chelsea but not that good for us.", + "length": 61 + }, + { + "text": "‘It is not easy to adapt but he has clearly done it very quickly.", + "length": 67 + }, + { + "text": "‘He’s scored seven times so far so that record speaks for itself.", + "length": 69 + }, + { + "text": "But we’ve got a good defence and I believe we can stop him on Sunday.", + "length": 71 + }, + { + "text": "‘I’ve known Cesc since we were 16-years-old and were in the Spanish youth squads together.", + "length": 94 + }, + { + "text": "‘Maybe some of you think it’s strange why he is not in the best moment,’ said Pellegrini.", + "length": 95 + }, + { + "text": "Silva added: ‘We need to win because otherwise the gap would be getting too big between the clubs.", + "length": 100 + }, + { + "text": "‘But it’s really all about how the team plays, and our aim as always is to go out and win the game.", + "length": 103 + }, + { + "text": "‘There are a lot of human things and maybe that is why he is not in his best performance at the moment.", + "length": 105 + }, + { + "text": "Costa scored a hat-trick against Swansea City last weekend and is already on seven goals in a blue shirt .", + "length": 106 + }, + { + "text": "David Silva has backed Manchester City to stop Chelsea’s £32million goal machine Diego Costa in his tracks.", + "length": 110 + }, + { + "text": "David Silva has backed Manchester City to stop Chelsea’s £32million goal machine Diego Costa in his tracks .", + "length": 111 + }, + { + "text": "I think as a team, as a coach, as a club we must support him because he is a very important player and makes a difference.", + "length": 122 + }, + { + "text": "’ Costa’s impact has seen Chelsea set the early-season pace and they can move eight points ahead of City if they win tomorrow.", + "length": 130 + }, + { + "text": "I think Chelsea have made some fantastic signings this summer but our friendship will be set aside because we are going out to win.", + "length": 131 + }, + { + "text": "It is very easy to say he should score 20 goals but must also defend and have better pace, but I don’t think Yaya has any problem.", + "length": 132 + }, + { + "text": "The two title favourites clash at the Etihad on Sunday when City playmaker Silva will come up against his Spain teammates Costa and Cesc Fabregas.", + "length": 146 + }, + { + "text": "‘But we started the season and maybe he had a lot of personal problems with his brother – not what was said in the media, but in what he feels about it.", + "length": 156 + }, + { + "text": "’ Pellegrini added that new £32m centre-back Eliaquim Mangala has not made his debut yet because ‘he needs some time to understand the way we defend’.", + "length": 157 + }, + { + "text": "It followed a turbulent summer in which Toure accused the club of not giving him compassionate leave to see his 28-year-old brother who died of cancer in June.", + "length": 159 + }, + { + "text": "David Silva was speaking as an ambassador for Anfi, a luxury holiday company based in Gran Canaria who are leading a campaign called ‘A new home for Bailey’.", + "length": 161 + }, + { + "text": "Silva was talking at an event to raise awareness about Pallister Killian Syndrome (PKS) Costa should be back in the Chelsea starting XI when the teams face each other on Sunday .", + "length": 179 + }, + { + "text": "It aims to raise £45,000 so the family of Bailey Stewart, a two-year-old boy with a rare condition that affects only five people in the UK, can adapt their home to meet his special needs.", + "length": 188 + }, + { + "text": "Costa has scored seven goals in four Premier League games since joining Chelsea from Atletico Madrid, but Silva is confident that that champions can become the first team to shut him out in the league.", + "length": 201 + }, + { + "text": "‘I’m looking forward to facing him but in pretty much every game now I seem to be up against a Spanish teammate – when we play Arsenal it’s Santi (Cazorla) or when it’s United it will be Juan Mata.", + "length": 207 + }, + { + "text": "’ Meanwhile, City boss Manuel Pellegrini believes the death of Yaya Toure’s younger brother Ibrahim could be behind his loss of form after the Ivory Coast midfielder was widely criticised following the Champions League defeat to Bayern Munich in midweek.", + "length": 258 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5036607384681702 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:42.176784108Z", + "first_section_created": "2025-12-23T09:35:42.177090221Z", + "last_section_published": "2025-12-23T09:35:42.177255328Z", + "all_results_received": "2025-12-23T09:35:42.23545493Z", + "output_generated": "2025-12-23T09:35:42.235593236Z", + "total_processing_time_ms": 58, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:42.177090221Z", + "publish_time": "2025-12-23T09:35:42.177255328Z", + "first_worker_start": "2025-12-23T09:35:42.177660244Z", + "last_worker_end": "2025-12-23T09:35:42.234525Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:42.177728347Z", + "start_time": "2025-12-23T09:35:42.177773449Z", + "end_time": "2025-12-23T09:35:42.177838652Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:42.177988Z", + "start_time": "2025-12-23T09:35:42.178123Z", + "end_time": "2025-12-23T09:35:42.234525Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:42.177666244Z", + "start_time": "2025-12-23T09:35:42.177732147Z", + "end_time": "2025-12-23T09:35:42.177836251Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:42.177601042Z", + "start_time": "2025-12-23T09:35:42.177660244Z", + "end_time": "2025-12-23T09:35:42.177702646Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3617, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/0065766f2945b8361ad0fb10d8141e964f7791b4.json b/data/output/0065766f2945b8361ad0fb10d8141e964f7791b4.json new file mode 100644 index 0000000..0d59aaf --- /dev/null +++ b/data/output/0065766f2945b8361ad0fb10d8141e964f7791b4.json @@ -0,0 +1,246 @@ +{ + "file_name": "0065766f2945b8361ad0fb10d8141e964f7791b4.txt", + "total_words": 408, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "will", + "count": 10 + }, + { + "word": "gaal", + "count": 8 + }, + { + "word": "giggs", + "count": 8 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "training", + "count": 8 + }, + { + "word": "united", + "count": 8 + }, + { + "word": "van", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "David Kent .", + "length": 12 + }, + { + "text": "VIDEO United looking to add Vidal .", + "length": 35 + }, + { + "text": "They hope to have Van Gaal in charge by this time.", + "length": 50 + }, + { + "text": "Watching on: Giggs took charge of the final four games of last season .", + "length": 71 + }, + { + "text": "Trusted aid: Albert Stuivenberg has been added to United's backroom team .", + "length": 74 + }, + { + "text": "Marching on: Louis van Gaal has guided Holland to the World Cup semi-finals .", + "length": 77 + }, + { + "text": "VIDEO Scroll down to watch Giggs describing his farewell as caretaker manager .", + "length": 79 + }, + { + "text": "Man in charge: Ryan Giggs will take pre-season training in Louis van Gaal's absence .", + "length": 85 + }, + { + "text": "The Dutchman will officially takeover at United once Holland's World Cup involvement is over.", + "length": 93 + }, + { + "text": "Man in charge: Giggs will over see training for the players who have not been to the World Cup .", + "length": 96 + }, + { + "text": "Scouting trip: Paul Scholes, Gary Neville and Ryan Giggs pictured watching Salford City Fc on Saturday .", + "length": 104 + }, + { + "text": "Ryan Giggs will take charge of Manchester United when the players return for pre-season training on Monday.", + "length": 107 + }, + { + "text": "Van Gaal has already brought in Frans Hoek and Marcel Bout at Old Trafford as he continues to re-mould the set-up.", + "length": 114 + }, + { + "text": "The 53-year-old will be expected to help the youngsters develop at United and be Van Gaal's eyes and ears on the training pitch.", + "length": 128 + }, + { + "text": "Players such as Wayne Rooney, Antonio Valencia and Javier Hernandez, who all featured at the World Cup, will be giving extra time off.", + "length": 134 + }, + { + "text": "Stuivenberg has worked with Van Gaal as Holland Under 21 boss and he has recommended many youngsters who have since moved up to the senior team.", + "length": 144 + }, + { + "text": "United’s players who did not feature at the World Cup will return to the Aon Training Complex in Carrington for the start of pre-season training.", + "length": 147 + }, + { + "text": "United travel to America at the end of this month to play a number of high-profile friendlies against LA Galaxy, Roma, Inter Milan and Real Madrid.", + "length": 147 + }, + { + "text": "Giggs – who was announced as Louis van Gaal’s No 2 – will be assisted by Dutchman Albert Stuivenberg who has just been added to the new look backroom team.", + "length": 161 + }, + { + "text": "The likes of Jonny Evans, Darren Fletcher and Ashley Young will all report for training along with Wilfried Zaha and Nick Powell who were sent out on loan last season.", + "length": 167 + }, + { + "text": "New United boss Van Gaal is still in Brazil preparing for Holland’s World Cup semi-final with Argentina and will leave the early training sessions to Giggs, who took charge the final four games of last season following David Moyes’ sacking.", + "length": 244 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6238908171653748 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:42.678025299Z", + "first_section_created": "2025-12-23T09:35:42.679553962Z", + "last_section_published": "2025-12-23T09:35:42.679725469Z", + "all_results_received": "2025-12-23T09:35:42.741456817Z", + "output_generated": "2025-12-23T09:35:42.741643125Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:42.679553962Z", + "publish_time": "2025-12-23T09:35:42.679725469Z", + "first_worker_start": "2025-12-23T09:35:42.680216489Z", + "last_worker_end": "2025-12-23T09:35:42.740623Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:42.68022339Z", + "start_time": "2025-12-23T09:35:42.680297393Z", + "end_time": "2025-12-23T09:35:42.680361895Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:42.680436Z", + "start_time": "2025-12-23T09:35:42.680584Z", + "end_time": "2025-12-23T09:35:42.740623Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:42.680206089Z", + "start_time": "2025-12-23T09:35:42.680256291Z", + "end_time": "2025-12-23T09:35:42.680316894Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:42.680151687Z", + "start_time": "2025-12-23T09:35:42.680216489Z", + "end_time": "2025-12-23T09:35:42.68023989Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2294, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/00657a4cd247ea033769f76e67a6877c85272366.json b/data/output/00657a4cd247ea033769f76e67a6877c85272366.json new file mode 100644 index 0000000..98cbde6 --- /dev/null +++ b/data/output/00657a4cd247ea033769f76e67a6877c85272366.json @@ -0,0 +1,398 @@ +{ + "file_name": "00657a4cd247ea033769f76e67a6877c85272366.txt", + "total_words": 755, + "top_n_words": [ + { + "word": "dunn", + "count": 19 + }, + { + "word": "s", + "count": 19 + }, + { + "word": "the", + "count": 19 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "her", + "count": 16 + }, + { + "word": "hailey", + "count": 15 + }, + { + "word": "said", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "to", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": ".", + "length": 1 + }, + { + "text": "B.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "also.", + "length": 5 + }, + { + "text": "‘I .", + "length": 6 + }, + { + "text": "‘I've .", + "length": 9 + }, + { + "text": "‘That .", + "length": 9 + }, + { + "text": "Shortly .", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "She is my child.", + "length": 16 + }, + { + "text": "‘It still does.", + "length": 17 + }, + { + "text": "Hailey is my baby.", + "length": 18 + }, + { + "text": "This is my daughter.", + "length": 20 + }, + { + "text": "19:06 EST, 21 May 2013 .", + "length": 24 + }, + { + "text": "17:20 EST, 21 May 2013 .", + "length": 24 + }, + { + "text": "kept my hope the whole time.", + "length": 28 + }, + { + "text": "’ No arrests have been made.", + "length": 30 + }, + { + "text": "That would really drive me crazy.", + "length": 33 + }, + { + "text": "‘That would just worry me sick.", + "length": 33 + }, + { + "text": "he could be involved,’ she said.", + "length": 34 + }, + { + "text": "I did,’ the 36-year-old mother said.", + "length": 38 + }, + { + "text": "‘Now it's real and now he accepts it.", + "length": 39 + }, + { + "text": "Police found Dunn's remains near Lake J.", + "length": 40 + }, + { + "text": "‘I know there are other possibilities .", + "length": 41 + }, + { + "text": "Daily Mail Reporter and Associated Press Reporter .", + "length": 51 + }, + { + "text": "‘That question has always disgusted me,’ she said.", + "length": 54 + }, + { + "text": "fear that she was dead, I put it way in the back of my mind.", + "length": 60 + }, + { + "text": "’ Adkins has denied involvement in Hailey's disappearance.", + "length": 60 + }, + { + "text": "’ Officials have not released a manner and cause of death.", + "length": 60 + }, + { + "text": "daughter wondering why her mother and father hadn't found her.", + "length": 62 + }, + { + "text": "‘They're being very meticulous,’ Dunn said of forensic experts.", + "length": 67 + }, + { + "text": "the first public comments since Hailey's remains were identified, .", + "length": 67 + }, + { + "text": "time, Shawn Adkins, a person of interest, but he has not been charged.", + "length": 70 + }, + { + "text": "after her disappearance, police called Dunn's live-in boyfriend at the .", + "length": 72 + }, + { + "text": "Billie Jean Dunn said possibly the most painful part was picturing her .", + "length": 72 + }, + { + "text": "learned a lot over the past few months and I feel pretty strongly that .", + "length": 72 + }, + { + "text": "Thomas in west Texas but they have yet released a manner and cause of death .", + "length": 77 + }, + { + "text": "Dunn and Adkins split up early last year and last had email contact in August.", + "length": 78 + }, + { + "text": "‘They're doing everything they know that they need to do to collect any possible evidence.", + "length": 92 + }, + { + "text": "’ Hailey's brother, 18-year-old David Dunn, is ‘very angry and very sad,’ his mom said.", + "length": 93 + }, + { + "text": "Hailey's brother, 18-year-old David sobbed throughout a memorial service held Sunday at his sister's middle school .", + "length": 116 + }, + { + "text": "Dunn said authorities still have Hailey's remains, which she plans to cremate once they're turned over to the family.", + "length": 117 + }, + { + "text": "Hailey Darlene Dunn was reported missing by her mom in December 2010 in Colorado City, about 250 miles west of Dallas.", + "length": 118 + }, + { + "text": "Remains found in March near at a West Texas lake about 20 miles away were identified last month as belonging to Hailey.", + "length": 119 + }, + { + "text": "’ Dunn said it's ‘absurd’ for people to question her having anything to do with her daughter's disappearance or death.", + "length": 124 + }, + { + "text": "He sobbed throughout a memorial service held Sunday at Hailey's middle school where hundreds came to bid farewell to the girl.", + "length": 126 + }, + { + "text": "From the start, Dunn said, her worry was that a ‘random weirdo’ might have pulled off nearby Interstate 20 and abducted Hailey.", + "length": 131 + }, + { + "text": "Hailey Darlene Dunn's remains were found in March near a West Texas lake about 20 miles away from where she disappeared in December 2010 .", + "length": 138 + }, + { + "text": "‘He couldn't stand the sight of a “missing” poster (about Hailey), of course, because it made it too real for him,’ Billie Jean Dunn said.", + "length": 146 + }, + { + "text": "’ Dunn said that while she got some answers from finally knowing where Hailey is, she ‘won't have full closure unless there's been an arrest made.", + "length": 150 + }, + { + "text": "Hailey Darlene Dunn (left), seen here with her mother and her mother's boyfriend, went missing three days after this Christmas picture was taken in 2010 .", + "length": 154 + }, + { + "text": "' Tracy Hopper comforts her daughter, Maci Hopper, 13, as they remember Hailey Dunn during a memorial service on Sunday at Colorado Middle School in Colorado City, Texas .", + "length": 171 + }, + { + "text": "Mother Billie Jean Dunn, right, and other family members grieve the teenager's loss during a public memorial service Sunday at Colorado Middle School in Colorado City, Texas .", + "length": 175 + }, + { + "text": "’ Dunn's attorney, John Young, said authorities have told him investigators are looking at ‘other cases that may in fact be related or may be the same type to the very specific facts of this case.", + "length": 200 + }, + { + "text": "‘Things you don't think of normally just run through your mind, like what is somebody doing to her, what is she having to live through,’ said Dunn, who works as an administrative assistant in Austin.", + "length": 203 + }, + { + "text": "The mother of a 13-year-old West Texas cheerleader whose body was found in March more than two years after she first went missing, has spoken about her concerns that her ex-boyfriend may have been involved in her daughter’s death.", + "length": 232 + }, + { + "text": "Speaking for the first time since her daughter’s remain were found, Billie Jean Dunn said she had spent the past two years fearing the worst, including that the girl was chained and tortured or had fallen victim to human traffickers.", + "length": 235 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5345779061317444 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:43.180505341Z", + "first_section_created": "2025-12-23T09:35:43.180895957Z", + "last_section_published": "2025-12-23T09:35:43.181150968Z", + "all_results_received": "2025-12-23T09:35:43.247689514Z", + "output_generated": "2025-12-23T09:35:43.247900023Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:43.180895957Z", + "publish_time": "2025-12-23T09:35:43.181150968Z", + "first_worker_start": "2025-12-23T09:35:43.181619187Z", + "last_worker_end": "2025-12-23T09:35:43.246777Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:43.181582285Z", + "start_time": "2025-12-23T09:35:43.181653688Z", + "end_time": "2025-12-23T09:35:43.181735092Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:43.181788Z", + "start_time": "2025-12-23T09:35:43.181928Z", + "end_time": "2025-12-23T09:35:43.246777Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:43.181642888Z", + "start_time": "2025-12-23T09:35:43.181721691Z", + "end_time": "2025-12-23T09:35:43.181835296Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:43.181562685Z", + "start_time": "2025-12-23T09:35:43.181619187Z", + "end_time": "2025-12-23T09:35:43.181653788Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4321, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0065c7b90906d4cedb0b0a4274b6adc22ddef9d8.json b/data/output/0065c7b90906d4cedb0b0a4274b6adc22ddef9d8.json new file mode 100644 index 0000000..51f7a57 --- /dev/null +++ b/data/output/0065c7b90906d4cedb0b0a4274b6adc22ddef9d8.json @@ -0,0 +1,402 @@ +{ + "file_name": "0065c7b90906d4cedb0b0a4274b6adc22ddef9d8.txt", + "total_words": 534, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "freddie", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "said", + "count": 11 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "he", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "In .", + "length": 4 + }, + { + "text": "We .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "said.", + "length": 5 + }, + { + "text": "She .", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "’ Mr .", + "length": 8 + }, + { + "text": "earlier.", + "length": 8 + }, + { + "text": "her eye.", + "length": 8 + }, + { + "text": "Freddie, .", + "length": 10 + }, + { + "text": "It is hell.", + "length": 11 + }, + { + "text": "I didn’t .", + "length": 12 + }, + { + "text": "The teacher .", + "length": 13 + }, + { + "text": "than jogging.", + "length": 13 + }, + { + "text": "’ Freddie .", + "length": 13 + }, + { + "text": "Napper’s car.", + "length": 15 + }, + { + "text": "‘We don’t .", + "length": 15 + }, + { + "text": "’ At the same .", + "length": 17 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "have got to live with it.", + "length": 25 + }, + { + "text": "not to be there,’ he said.", + "length": 28 + }, + { + "text": "seconds before the collision.", + "length": 29 + }, + { + "text": "‘They can’t bear for him .", + "length": 30 + }, + { + "text": "‘She has got to live with it.", + "length": 31 + }, + { + "text": "car  being driven by a teacher.", + "length": 32 + }, + { + "text": "A boy aged ten was fatally injured .", + "length": 36 + }, + { + "text": "have been able to prevent the accident.", + "length": 39 + }, + { + "text": "feel angry towards the lady,’ he said.", + "length": 40 + }, + { + "text": "She said: ‘He was running a bit faster .", + "length": 42 + }, + { + "text": "teddy bear in a uniform and put it in his seat.", + "length": 47 + }, + { + "text": "30pm on a green near the family home in Didcot.", + "length": 47 + }, + { + "text": "see him prior to that due to the parked vehicles.", + "length": 49 + }, + { + "text": "‘It was the only thing I could do to try and avoid him.", + "length": 57 + }, + { + "text": "said the family did not blame Miss Napper for the tragedy.", + "length": 58 + }, + { + "text": "to prevent the accident, with parked vehicles blocking her view.", + "length": 64 + }, + { + "text": "‘I slammed on my brakes as soon as I saw him,’ Miss Napper .", + "length": 64 + }, + { + "text": "a keen Harry Potter and boxing fan, was taken to John Radcliffe .", + "length": 65 + }, + { + "text": "was hit by the front left wing of the car and thrown into the air.", + "length": 66 + }, + { + "text": "answered it as he ran out between parked cars, oblivious of Miss .", + "length": 66 + }, + { + "text": "Perry said pupils at Stephen Freeman Community Primary School in .", + "length": 66 + }, + { + "text": "She said  there were a lot of cars parked on the side of the road.", + "length": 67 + }, + { + "text": "to his ear when he disappeared around the side of a parked vehicle .", + "length": 68 + }, + { + "text": "Hospital in Oxford on September 10 last year, but died the next day.", + "length": 68 + }, + { + "text": "outside a school as he chatted on his mobile phone and did not see a .", + "length": 70 + }, + { + "text": "Didcot, Oxfordshire, where Freddie had been in Year 6, had dressed a .", + "length": 70 + }, + { + "text": "schoolboy was playing with a friend just yards from his home when he .", + "length": 70 + }, + { + "text": "The friend told police that Freddie received a call on his phone and .", + "length": 70 + }, + { + "text": "time Miss Napper was travelling down the road, having just left Didcot .", + "length": 72 + }, + { + "text": "was travelling at 20mph and said there was nothing she could have done .", + "length": 72 + }, + { + "text": "evidence, PC Naomi Hames said the friend saw Freddie holding the phone .", + "length": 72 + }, + { + "text": "The inquest in Oxford heard that Freddie was playing with a friend at 5.", + "length": 72 + }, + { + "text": "Speaking after the inquest, Freddie’s father, coach driver Lea Perry, .", + "length": 73 + }, + { + "text": "described seeing a young boy running into the road out of the corner of .", + "length": 73 + }, + { + "text": "Forensic collision investigator Andrew Evans said Miss Napper would not .", + "length": 73 + }, + { + "text": "was hit by Joanne Napper’s Nissan Micra, the inquest was told yesterday.", + "length": 74 + }, + { + "text": "Girls’ School where Freddie’s sister, Eloise, 12, had joined just a week .", + "length": 78 + }, + { + "text": "Tributes to Freddie Perry left at the side of the road where he was knocked down in September last year .", + "length": 105 + }, + { + "text": "A coroner heard that Freddie Perry was knocked down by the car, although the driver was exonerated of any blame.", + "length": 112 + }, + { + "text": "Tragic Freddie Perry was knocked down and killed while talking on his mobile phone near his home in Didcot, Oxfordshire .", + "length": 121 + }, + { + "text": "Recording a verdict of accidental death, Oxfordshire coroner Darren Salter said it was an ‘extremely tragic incident’.", + "length": 122 + }, + { + "text": "Freddie's family have since launched a road safety campaign to get a crossing built on the road and have raised thousands of pounds for charity .", + "length": 145 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7272036671638489 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:43.68194554Z", + "first_section_created": "2025-12-23T09:35:43.682323956Z", + "last_section_published": "2025-12-23T09:35:43.682555765Z", + "all_results_received": "2025-12-23T09:35:43.749803641Z", + "output_generated": "2025-12-23T09:35:43.749959348Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:43.682323956Z", + "publish_time": "2025-12-23T09:35:43.682555765Z", + "first_worker_start": "2025-12-23T09:35:43.683097688Z", + "last_worker_end": "2025-12-23T09:35:43.748872Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:43.683035885Z", + "start_time": "2025-12-23T09:35:43.683097688Z", + "end_time": "2025-12-23T09:35:43.68315939Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:43.683294Z", + "start_time": "2025-12-23T09:35:43.683453Z", + "end_time": "2025-12-23T09:35:43.748872Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:43.683047786Z", + "start_time": "2025-12-23T09:35:43.683126689Z", + "end_time": "2025-12-23T09:35:43.683202492Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:43.683046486Z", + "start_time": "2025-12-23T09:35:43.683123189Z", + "end_time": "2025-12-23T09:35:43.68314659Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2975, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0065ff53616e8bf09d8f252ccc3cd293849a250d.json b/data/output/0065ff53616e8bf09d8f252ccc3cd293849a250d.json new file mode 100644 index 0000000..cf5bda1 --- /dev/null +++ b/data/output/0065ff53616e8bf09d8f252ccc3cd293849a250d.json @@ -0,0 +1,290 @@ +{ + "file_name": "0065ff53616e8bf09d8f252ccc3cd293849a250d.txt", + "total_words": 490, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "their", + "count": 13 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "said", + "count": 8 + }, + { + "word": "band", + "count": 7 + }, + { + "word": "music", + "count": 7 + }, + { + "word": "on", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "11:36 EST, 8 November 2012 .", + "length": 28 + }, + { + "text": "10:09 EST, 8 November 2012 .", + "length": 28 + }, + { + "text": "reasons - to help promote his own book.", + "length": 39 + }, + { + "text": "'The success of this band is based on a .", + "length": 41 + }, + { + "text": "In a statement on the band's website Mr .", + "length": 41 + }, + { + "text": "Kastelruther Spatzen, from the German-speaking .", + "length": 48 + }, + { + "text": "'The band of fakes must give back their Echo awards,' he said.", + "length": 62 + }, + { + "text": "manager who said using studio musicians were 'common practice'.", + "length": 63 + }, + { + "text": "giant fraud,' former producer Walter Widemair told Bild newspaper.", + "length": 66 + }, + { + "text": "province of South Tyrol in northern Italy, were defended by their .", + "length": 67 + }, + { + "text": "for nearly 30 years and was now betraying the band for commercial .", + "length": 67 + }, + { + "text": "Brossmann said Widemair had been responsible for the studio recordings .", + "length": 72 + }, + { + "text": "'In nearly 30 years of concert tours, they always played and sang everything live.", + "length": 82 + }, + { + "text": "They were stripped of their 1990 Grammy Award and their management faced over 20 lawsuits.", + "length": 90 + }, + { + "text": "He said that only the lead singer's voice was genuine on the albums and called for the group to return their prices.", + "length": 116 + }, + { + "text": "In his defence of the popular folk-music band, he said Kastelruther Spatzen had even listed the studio musicians on their albums.", + "length": 129 + }, + { + "text": "'Fake': Folk music favourites Kastelruther Spatzen are being accused of 'faking music' after it emerged they use studio musicians .", + "length": 131 + }, + { + "text": "' Praised: Kastelruther Spatzen after winning Best Folk Music at the Echo Awards in 2007, an award which may now be taken from them .", + "length": 133 + }, + { + "text": "The revelations have made the headlines in Germany echoing the scandal surrounding early 90s lip-syncing pop-sensation Milli Vanilli.", + "length": 133 + }, + { + "text": "German producer Frank Farian turned two young Munich models, Robert Pilatus and Fabrice Morvan, into the group known as Milli Vanilli.", + "length": 134 + }, + { + "text": "The seven-man Kastelruther Spatzen used studio musicians to create their famous Alpine brass-band sound, their former producer claims.", + "length": 134 + }, + { + "text": "A German-language folk band have been asked to return 13 awards after it emerged that they have been 'faking' the music on their albums.", + "length": 136 + }, + { + "text": "Kastelruther Spatzen's manager Helmut Brossmann said that it was common practice for folk music bands to use studio musicians for their albums.", + "length": 144 + }, + { + "text": "The dispute has received much media attention in Germany, with newspapers recalling the scandal of their fellow countrymen in pop duo Milli Vanilli.", + "length": 148 + }, + { + "text": "'The names of the studio musician and everyone who contributed to the chorus of songs was mentioned in every CD that they were involved in,' said Brossmann.", + "length": 156 + }, + { + "text": "Retro fake: Late 80s German pop duo Milli Vanilli were discovered to have been lip-syncing to others vocals and were forced to return their 1990 Grammy Award .", + "length": 159 + }, + { + "text": "Real thing: Their former producer said lead singer Norbert Rier, seen here accepting a Folk Music award in Germany earlier this year, was the only 'genuine' voice on the albums .", + "length": 178 + }, + { + "text": "The group had great international success in the late 80s and early 90s with songs such as 'Girl You Know It's True' and 'Blame It On The Rain', but were disgraced when it emerged that they had been miming to other people's vocals.", + "length": 231 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4949769377708435 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:44.183694052Z", + "first_section_created": "2025-12-23T09:35:44.183992764Z", + "last_section_published": "2025-12-23T09:35:44.184208673Z", + "all_results_received": "2025-12-23T09:35:44.249992289Z", + "output_generated": "2025-12-23T09:35:44.250160496Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:44.183992764Z", + "publish_time": "2025-12-23T09:35:44.184208673Z", + "first_worker_start": "2025-12-23T09:35:44.184702794Z", + "last_worker_end": "2025-12-23T09:35:44.249097Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:44.184678693Z", + "start_time": "2025-12-23T09:35:44.184754396Z", + "end_time": "2025-12-23T09:35:44.184811398Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:44.184931Z", + "start_time": "2025-12-23T09:35:44.185076Z", + "end_time": "2025-12-23T09:35:44.249097Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:44.184735995Z", + "start_time": "2025-12-23T09:35:44.184801698Z", + "end_time": "2025-12-23T09:35:44.184874801Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:44.184652292Z", + "start_time": "2025-12-23T09:35:44.184702794Z", + "end_time": "2025-12-23T09:35:44.184728795Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2937, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/006652b4ba431e44baec92d84fe62c4727983642.json b/data/output/006652b4ba431e44baec92d84fe62c4727983642.json new file mode 100644 index 0000000..85c85a9 --- /dev/null +++ b/data/output/006652b4ba431e44baec92d84fe62c4727983642.json @@ -0,0 +1,314 @@ +{ + "file_name": "006652b4ba431e44baec92d84fe62c4727983642.txt", + "total_words": 765, + "top_n_words": [ + { + "word": "the", + "count": 51 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "ukraine", + "count": 17 + }, + { + "word": "russian", + "count": 14 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "s", + "count": 10 + }, + { + "word": "said", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Targeted sanctions .", + "length": 20 + }, + { + "text": "In a phone call with U.", + "length": 23 + }, + { + "text": "Inspectors seized in Slavyansk .", + "length": 32 + }, + { + "text": "President Barack Obama threatened Russia with new sanctions.", + "length": 60 + }, + { + "text": "-- Yatsenyuk met with Pope Francis while in Rome on Saturday.", + "length": 61 + }, + { + "text": "\"We urge Russia to leave us alone,\" he said in televised remarks.", + "length": 65 + }, + { + "text": "Developments in Ukraine have come at a rapid pace in recent days: .", + "length": 67 + }, + { + "text": "On Saturday, the fate of the military inspectors preoccupied world leaders.", + "length": 75 + }, + { + "text": "\" Many eastern Ukraine residents have Russian roots and sympathize with Moscow.", + "length": 79 + }, + { + "text": "The meeting has been seen as a sign of support from the Vatican for his government.", + "length": 83 + }, + { + "text": "CNN's Alex Felton, Bharati Naik, Ben Brumfield and Boriana Milanova also contributed.", + "length": 85 + }, + { + "text": "-- G7 leaders said they would impose new sanctions on Russia over its role in the crisis.", + "length": 89 + }, + { + "text": "CNN's Gul Tuysuz reported from Kiev and Laura Smith-Spark wrote and reported from London.", + "length": 89 + }, + { + "text": "The others are from Denmark, Poland, Bulgaria and the Czech Republic, Russian state media said.", + "length": 95 + }, + { + "text": "The Russian Defense Ministry denied the accusation, according to the state news agency Itar-Tass.", + "length": 97 + }, + { + "text": "Ukraine launched the second stage of an \"anti-terrorist operation\" against militants in Slavyansk.", + "length": 98 + }, + { + "text": "Russian President Vladimir Putin has repeatedly criticized Kiev's use of force against Ukrainian civilians.", + "length": 107 + }, + { + "text": "CNN's Andrew Carey and Nick Paton Walsh in Slavyansk and journalist Victoria Butenko in Kiev contributed to this report.", + "length": 120 + }, + { + "text": "-- On Friday, a team of European and Ukrainian military observers were seized Friday by pro-Russian separatists in Slavyansk.", + "length": 125 + }, + { + "text": "The German Foreign Office said it had set up an emergency task force to find out what has happened to the team members, four of whom are German.", + "length": 144 + }, + { + "text": "Ukraine's Security Service, the SBU, said the group is being kept under \"inhumane conditions\" in the basement of a building held by the militants.", + "length": 146 + }, + { + "text": "Ukraine's government has promised constitutional reforms and protections for Russian speakers in a bid to ease the tensions in its eastern regions.", + "length": 147 + }, + { + "text": "The Ukrainian Prime Minister urged Russia to pull back its security forces and not to support pro-Russian militants in eastern and southern Ukraine.", + "length": 148 + }, + { + "text": "The statement from the group -- which includes Canada, France, Germany, Italy, Japan, the United Kingdom and the United States -- came hours after U.", + "length": 149 + }, + { + "text": "The source said the photos showed about 160 tanks, 230 infantry combat vehicles and armored personnel carriers, mine throwers and multiple-launch rocket systems.", + "length": 161 + }, + { + "text": "-- Russian military aircraft \"crossed and violated\" Ukrainian airspace seven times overnight, Ukrainian Prime Minister Arseniy Yatsenyuk told reporters in Rome on Saturday.", + "length": 172 + }, + { + "text": "-- Russia, which already had 40,000 troops on its side of the border, started new military drills a few days ago after Ukrainian forces said they killed five pro-Russian militants.", + "length": 180 + }, + { + "text": "Secretary of State John Kerry, Russian Foreign Minister Sergey Lavrov asked the United States to use its influence to secure the release of pro-Russian leaders being held in Ukraine.", + "length": 182 + }, + { + "text": "Kerry urged Russia to support efforts of the OSCE and the government of Ukraine to liberate the inspectors and their Ukrainian guides, according to a senior State Department official.", + "length": 183 + }, + { + "text": "Separatist leader Denis Pushilin, self-declared chairman of the so-called \"Donetsk People's Republic,\" told CNN he doesn't believe they are from the OSCE, but that some are NATO spies.", + "length": 184 + }, + { + "text": "The Defense Ministry source said the number of Ukraine troops put the pro-Russian militants at a disadvantage because the latter are \"armed only with small amount of pistols and shotguns.", + "length": 187 + }, + { + "text": "The self-declared mayor of Slavyansk, Vyacheslav Ponomarev, told reporters that one of the \"prisoners\" has diabetes, but he has the medicine he needs and will be given his own quarters overnight.", + "length": 195 + }, + { + "text": "\" Quoting a Russian Defense Ministry source, RIA Novosti said satellite photos showed the force forming around the city that has become a friction point between the Ukraine military and pro-Russian militants.", + "length": 208 + }, + { + "text": "Against the backdrop of increasing volatility in Ukraine, leaders of the G7 industrialized nations on Friday announced they would \"move swiftly to impose additional sanctions on Russia\" over its actions in Ukraine.", + "length": 214 + }, + { + "text": "The OSCE mission in Ukraine is tasked with helping to implement an international agreement signed nine days ago in Switzerland, which called for illegal militia groups to disarm and leave occupied buildings, among other provisions.", + "length": 231 + }, + { + "text": "Kiev, Ukraine (CNN) -- A perilous face-off intensified Saturday when Russia state news complained that Ukraine had mobilized 15,000 troops in the suburbs of Slavyansk in eastern Ukraine \"in order to wipe out the city and its residents.", + "length": 235 + }, + { + "text": "The inspectors from the Organization for Security and Co-operation in Europe were detained Friday as they entered Slavyansk, along with five Ukrainian military representatives and the driver of their bus, Ukraine's Interior Ministry said.", + "length": 238 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7656263709068298 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:44.685002846Z", + "first_section_created": "2025-12-23T09:35:44.685396562Z", + "last_section_published": "2025-12-23T09:35:44.685629872Z", + "all_results_received": "2025-12-23T09:35:44.744135887Z", + "output_generated": "2025-12-23T09:35:44.744350496Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:44.685396562Z", + "publish_time": "2025-12-23T09:35:44.685629872Z", + "first_worker_start": "2025-12-23T09:35:44.686134192Z", + "last_worker_end": "2025-12-23T09:35:44.743273Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:44.686123892Z", + "start_time": "2025-12-23T09:35:44.686206895Z", + "end_time": "2025-12-23T09:35:44.686292699Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:44.686388Z", + "start_time": "2025-12-23T09:35:44.68651Z", + "end_time": "2025-12-23T09:35:44.743273Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:44.68607889Z", + "start_time": "2025-12-23T09:35:44.686164694Z", + "end_time": "2025-12-23T09:35:44.686279498Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:44.686058489Z", + "start_time": "2025-12-23T09:35:44.686134192Z", + "end_time": "2025-12-23T09:35:44.686168394Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4812, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/006684f2e2fa98b707502c549ab1a0ea0d22a3db.json b/data/output/006684f2e2fa98b707502c549ab1a0ea0d22a3db.json new file mode 100644 index 0000000..73a3caf --- /dev/null +++ b/data/output/006684f2e2fa98b707502c549ab1a0ea0d22a3db.json @@ -0,0 +1,294 @@ +{ + "file_name": "006684f2e2fa98b707502c549ab1a0ea0d22a3db.txt", + "total_words": 742, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "a", + "count": 32 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "is", + "count": 12 + }, + { + "word": "with", + "count": 12 + }, + { + "word": "as", + "count": 10 + }, + { + "word": "it", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "A- .", + "length": 4 + }, + { + "text": "com .", + "length": 5 + }, + { + "text": "Oh, have you heard?", + "length": 19 + }, + { + "text": "Okay, like his brother.", + "length": 23 + }, + { + "text": "See the full article at EW.", + "length": 27 + }, + { + "text": "They get to work issues out.", + "length": 28 + }, + { + "text": "It bridges slapstick and action.", + "length": 32 + }, + { + "text": ") But that, as I say, is on the surface.", + "length": 40 + }, + { + "text": "Depp makes a delightful cameo appearance!", + "length": 41 + }, + { + "text": "It's quick-witted with its pop references.", + "length": 42 + }, + { + "text": "It's part homage and part wink at the past.", + "length": 43 + }, + { + "text": "Also, Tatum can sustain a great, I mean great, Dumb Face.", + "length": 57 + }, + { + "text": "'' There's room for laughs and truth at this newly reopened address.", + "length": 68 + }, + { + "text": "(''Ap-chemistry,'' he calls it, laboriously reading his course list.", + "length": 68 + }, + { + "text": ") Given a do-over, the two get to reexperience those less than wonder years.", + "length": 76 + }, + { + "text": "It jokes about high school but is also a sensitive sociological study of those crucial years.", + "length": 93 + }, + { + "text": "(The curiosity isn't that he's the brother of James Franco; it's that he's so interestingly weird.", + "length": 98 + }, + { + "text": "And now for the revelation: The guy's got bust-out talents as a really funny, self-aware comic actor.", + "length": 101 + }, + { + "text": "Underneath, \"21 Jump Street\" is a riot of risks that pay off, the biggest of which might be handing Tatum funny business.", + "length": 121 + }, + { + "text": "Schmidt is assigned a class schedule befitting a popular non-Einstein; Jenko is shuffled into advanced-placement chemistry.", + "length": 123 + }, + { + "text": "And by the way they get to bust a drug ring fronted by a smart and popular guy played with oddball charisma by Dave Franco.", + "length": 123 + }, + { + "text": "On the surface, \"21 Jump Street\" follows the crime-fighting antics of odd-couple cop partners Schmidt (Hill) and Jenko (Channing Tatum).", + "length": 136 + }, + { + "text": "But 25 years later, \"21 Jump Street\" the TV show is remembered primarily as the career kickstarter of Johnny Depp as a young actor with an obvious something.", + "length": 157 + }, + { + "text": "The hit series ran for four years, and was notably progressive in its willingness to incorporate newsmaking social issues, including AIDS, homophobia, and child abuse.", + "length": 167 + }, + { + "text": "With all appropriate salutes to the busy fellow's famous abs, and with full forgiveness for his participation in \"The Vow,\" I am feeling the Channing charm for the first time.", + "length": 175 + }, + { + "text": "Explaining why he's assigning Schmidt and Jenko to shutting down the school drug ring after the death of one student, the captain tells it true: ''This kid is white, so people actually give a s---.", + "length": 197 + }, + { + "text": "Seven years later, when both police rookies are coincidentally assigned to an undercover--high schooler program, the duo are prepared to play out those same life scripts, until a mix-up alters fate.", + "length": 198 + }, + { + "text": "And wow, those scenes where the smart actor, playing a ''dumb'' character who realizes he's not as dumb as he has always believed he is, fakes playing a dumb guy to mess with his smart partner's head are kind of perfect.", + "length": 220 + }, + { + "text": "Their wonky dynamic is established in a perfectly placed opening flashback to 2005, when the two were real high school students -- Schmidt the klutzy, anxious nerd with a brain; Jenko the athletic, academically challenged coolio.", + "length": 229 + }, + { + "text": "The Late-1980s pop culture relic \"21 Jump Street\" was a primo specimen of a TV police procedural with a catchy hook: A team of fresh-faced cops work undercover as high school kids, reporting back to their tough/earnest boss at the address listed above.", + "length": 252 + }, + { + "text": "A refresher viewing of any old \"Jump Street\" episode may sharpen your appreciation for the kind of earnest '80s-TV police captain that Ice Cube is tweaking in his funky turn as Schmidt and Jenko's boss, but the joke is equally welcome without the historical background.", + "length": 269 + }, + { + "text": "As it turns out, dim memories and a new generation of pop culture consumers work to the great advantage of \"21 Jump Street\" the movie: What this fast, cheeky, and very funny interpretation of the original premise sacrifices in teachable moments, it makes up for in intelligent giddiness.", + "length": 287 + }, + { + "text": "Shaped by the precocious comedic smarts of talent-on-a-roll Jonah Hill (who not only costars but also developed the story with Michael Bacall and is one of the executive producers), the movie morphs into an action comedy with a tonal complexity that marks it as a very contemporary creative project.", + "length": 299 + }, + { + "text": "Under the limber direction of Phil Lord and Christopher Miller (\"Cloudy With a Chance of Meatballs\"), and working from a screenplay by Bacall -- a script jammed, by the way, with so many oinky references to male reproductive equipment that I choose to believe the producers were rising to a dare -- Hill and Tatum play their Mutt-and-Jeff act against a supporting cast equally fast on their feet.", + "length": 396 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5820273160934448 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:45.186448645Z", + "first_section_created": "2025-12-23T09:35:45.186849662Z", + "last_section_published": "2025-12-23T09:35:45.187026869Z", + "all_results_received": "2025-12-23T09:35:45.24978446Z", + "output_generated": "2025-12-23T09:35:45.249988068Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:45.186849662Z", + "publish_time": "2025-12-23T09:35:45.187026869Z", + "first_worker_start": "2025-12-23T09:35:45.187500589Z", + "last_worker_end": "2025-12-23T09:35:45.248416Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:45.187521589Z", + "start_time": "2025-12-23T09:35:45.187641894Z", + "end_time": "2025-12-23T09:35:45.187733598Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:45.18773Z", + "start_time": "2025-12-23T09:35:45.187872Z", + "end_time": "2025-12-23T09:35:45.248416Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:45.187477988Z", + "start_time": "2025-12-23T09:35:45.187571391Z", + "end_time": "2025-12-23T09:35:45.187682296Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:45.187431186Z", + "start_time": "2025-12-23T09:35:45.187500589Z", + "end_time": "2025-12-23T09:35:45.18753039Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4243, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/006693d9875ec220fb32444b16d0cbc1ddc96ed4.json b/data/output/006693d9875ec220fb32444b16d0cbc1ddc96ed4.json new file mode 100644 index 0000000..468f3f9 --- /dev/null +++ b/data/output/006693d9875ec220fb32444b16d0cbc1ddc96ed4.json @@ -0,0 +1,388 @@ +{ + "file_name": "006693d9875ec220fb32444b16d0cbc1ddc96ed4.txt", + "total_words": 1006, + "top_n_words": [ + { + "word": "the", + "count": 54 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "love", + "count": 21 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "bracelet", + "count": 17 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "on", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Not that the bracelets are always for eternity.", + "length": 47 + }, + { + "text": "Soon Cartier found it could not keep up with production.", + "length": 56 + }, + { + "text": "Cartier currently lists 16 lockable bracelets on its website.", + "length": 61 + }, + { + "text": "After all, love symbols should suggest an everlasting quality.", + "length": 62 + }, + { + "text": "Each couple locked each other in before exchanging screwdrivers.", + "length": 64 + }, + { + "text": "According to a source, her replacement was a gift from the company.", + "length": 67 + }, + { + "text": "The auction famously made more than three times the estimated value.", + "length": 68 + }, + { + "text": "He started working there in 1969, producing his iconic Love bracelet the same year.", + "length": 83 + }, + { + "text": "Others who are having second thoughts about being shackled to another needn’t worry.", + "length": 86 + }, + { + "text": "At one point the bracelet was so well known that it became a measure of one’s commitment.", + "length": 91 + }, + { + "text": "The bracelets have appeared on the lists of top auction houses include Christie’s and Bonhams.", + "length": 96 + }, + { + "text": "Cipullo had been fascinated by history and often produced work around legends and old practices.", + "length": 96 + }, + { + "text": "It consists of two C-shaped halves which are then secured using two working screws on either side.", + "length": 98 + }, + { + "text": "The upmarket jeweller, a favourite of the Duchess of Cambridge, launched its Love bangles in 1969.", + "length": 98 + }, + { + "text": "The bracelet can be removed using a special mini screwdriver which handily comes with every purchase.", + "length": 101 + }, + { + "text": "They could only make a purchase for somebody else – however, that is a rule that has since been abolished.", + "length": 108 + }, + { + "text": "Asking one’s beau if he loved you enough to buy the Cartier Love bracelet was on a par with an engagement.", + "length": 108 + }, + { + "text": "What modern people want are love symbols that look semi-permanent – or, at least, require a trick to remove.", + "length": 110 + }, + { + "text": "The bracelet was designed to be given to one’s true love and then screwed on to their wrist for all eternity.", + "length": 111 + }, + { + "text": "Lindsay Lohan, left, and Katie Holmes, right, are all fans of the bracelet, which ranges from £4,000 to £40,000 .", + "length": 115 + }, + { + "text": "The bracelet, which this year celebrates its 45th birthday, was created by Cartier’s Italian designer Aldo Cipullo.", + "length": 117 + }, + { + "text": "Pippa Middleton, left, was spotted getting her bracelet fixed last week and Chloe Green, right, has no fewer than six .", + "length": 119 + }, + { + "text": "of Windsor and Wallis Simpson were among the items in the 1998 Sotheby’s auction of the Royals’ private collections.", + "length": 120 + }, + { + "text": "’ When the bracelets were launched, Cartier enforced a rule that customers were not allowed to buy one for themselves.", + "length": 120 + }, + { + "text": "The designer is thought to have adopted the idea of being locked in a committed relationship in his Love bracelet design.", + "length": 121 + }, + { + "text": "The Precious Love bracelet with diamonds was launched in 1979 followed by the Love ring, Love cufflinks and Love earrings.", + "length": 122 + }, + { + "text": "Chloe Green, the daughter of Topshop tycoon Sir Philip Green, has no fewer than six, often wearing several at the same time.", + "length": 124 + }, + { + "text": "In an interview at the time, Cipullo said: ‘Love has become too commercial, yet life without love is nothing – a fat zero.", + "length": 126 + }, + { + "text": "It is understood Pippa was so upset by the loss of her bracelet, she went straight to the brand’s Bond Street store to buy another one.", + "length": 137 + }, + { + "text": "Yet this has done nothing to stop the rise of the bangle that has become the latest must-have trinket for a new generation of celebrities.", + "length": 138 + }, + { + "text": "Mr Khyami had claimed a tape showing him in bed with another woman had been made before he met the daughter of Formula 1 billionaire, Bernie.", + "length": 141 + }, + { + "text": "Its classic design comes in yellow, pink or white gold, while more intricate designs feature yellow and pink sapphires, garnets and amethysts.", + "length": 142 + }, + { + "text": "The Cartier Love bracelet was launched in 1969 and was designed to be given to one's true love and screwed on to their wrist for all eternity .", + "length": 143 + }, + { + "text": "Tamara Ecclestone returned two diamond-encrusted Love bracelets she had bought for her then boyfriend Omar Khyami when he cheated on her in 2012.", + "length": 145 + }, + { + "text": "Tom Cruise bought an 18-carat gold Love bracelet as a ‘push present’ for his then wife Katie Holmes on the birth of their daughter Suri in 2006.", + "length": 148 + }, + { + "text": "But sources close to Miss Ecclestone believed she had ‘absolute proof’ this was not the case after spotting Mr Khyami wearing the two bracelets in the film.", + "length": 160 + }, + { + "text": "It is understood that 31-year-old Pippa, who is in a relationship with financier Nico Jackson, had lost or misplaced her original bangle when the screws came loose.", + "length": 164 + }, + { + "text": "Cartier presented the bangles to 25 well-known couples including, rather ironically, Elizabeth Taylor and Richard Burton, at a ceremony in its boutique in New York.", + "length": 164 + }, + { + "text": "He was particularly interested in the story of men locking up their wives with iron chastity belts to ensure their fidelity while they were away on the battlefield.", + "length": 164 + }, + { + "text": "Cilla Black was lucky too when her Love bracelet – given to her by her late husband Bobby – was one of the pieces burglars left behind when they raided her home.", + "length": 165 + }, + { + "text": "Once nicknamed the ‘slave bracelet’, its design is rumoured to have been based on the medieval chastity belt into which men would lock their wives when they went to war.", + "length": 173 + }, + { + "text": "Meanwhile, actress Elizabeth Hurley, who was then the face of Estee Lauder, was presented with one of the bracelets by the firm’s boss Leonard Lauder as a token of his affection.", + "length": 180 + }, + { + "text": "To ensure that they were authentic to the last, Cartier last year lent the BBC a pink-gold Love bracelet, set with four diamonds, for its 2013 film Burton and Taylor starring Dominic West and Helena Bonham Carter.", + "length": 213 + }, + { + "text": "Love bracelets were spotted on the wrists of Nancy and Frank Sinatra and later Joan Collins, Princess Diana, Jane Seymour, Elton John and more recently Jennifer Aniston, Angelina Jolie and model Rosie Huntington-Whiteley.", + "length": 221 + }, + { + "text": "The Cartier Love bracelet has graced the wrists of Hollywood actresses Lindsay Lohan and Katie Holmes, while last week Pippa Middleton was spotted in Cartier’s Bond Street store getting her 18-carat gold bangle replaced.", + "length": 222 + }, + { + "text": "A replacement screw costs as much as £245, while the bracelets themselves range from £4,450 to an eye-watering £39,900 for bespoke designs inlaid with diamonds – making them a powerful status symbol on a par with a Rolex watch.", + "length": 232 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.46924611926078796 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:45.687941446Z", + "first_section_created": "2025-12-23T09:35:45.689200698Z", + "last_section_published": "2025-12-23T09:35:45.689540912Z", + "all_results_received": "2025-12-23T09:35:45.797928887Z", + "output_generated": "2025-12-23T09:35:45.798597514Z", + "total_processing_time_ms": 110, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 108, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:45.689200698Z", + "publish_time": "2025-12-23T09:35:45.689444008Z", + "first_worker_start": "2025-12-23T09:35:45.689835225Z", + "last_worker_end": "2025-12-23T09:35:45.78219Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:45.689924028Z", + "start_time": "2025-12-23T09:35:45.689981331Z", + "end_time": "2025-12-23T09:35:45.690094335Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:45.690132Z", + "start_time": "2025-12-23T09:35:45.690286Z", + "end_time": "2025-12-23T09:35:45.78219Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:45.689903427Z", + "start_time": "2025-12-23T09:35:45.68996193Z", + "end_time": "2025-12-23T09:35:45.690075234Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:45.689765522Z", + "start_time": "2025-12-23T09:35:45.689835225Z", + "end_time": "2025-12-23T09:35:45.689877726Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:45.68948551Z", + "publish_time": "2025-12-23T09:35:45.689540912Z", + "first_worker_start": "2025-12-23T09:35:45.689999731Z", + "last_worker_end": "2025-12-23T09:35:45.797054Z", + "total_journey_time_ms": 107, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:45.689944329Z", + "start_time": "2025-12-23T09:35:45.690056434Z", + "end_time": "2025-12-23T09:35:45.690080135Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:45.690197Z", + "start_time": "2025-12-23T09:35:45.690321Z", + "end_time": "2025-12-23T09:35:45.797054Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 106 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:45.689930028Z", + "start_time": "2025-12-23T09:35:45.689999731Z", + "end_time": "2025-12-23T09:35:45.690040633Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:45.68997203Z", + "start_time": "2025-12-23T09:35:45.690018532Z", + "end_time": "2025-12-23T09:35:45.690044133Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 197, + "min_processing_ms": 91, + "max_processing_ms": 106, + "avg_processing_ms": 98, + "median_processing_ms": 106, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2936, + "slowest_section_id": 1, + "slowest_section_time_ms": 107 + } +} diff --git a/data/output/0066b77d259f645ce9226011d4aa13121a8ac57b.json b/data/output/0066b77d259f645ce9226011d4aa13121a8ac57b.json new file mode 100644 index 0000000..be93f96 --- /dev/null +++ b/data/output/0066b77d259f645ce9226011d4aa13121a8ac57b.json @@ -0,0 +1,334 @@ +{ + "file_name": "0066b77d259f645ce9226011d4aa13121a8ac57b.txt", + "total_words": 505, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "she", + "count": 16 + }, + { + "word": "i", + "count": 15 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "it", + "count": 12 + }, + { + "word": "s", + "count": 12 + }, + { + "word": "beck", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "her", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "...", + "length": 3 + }, + { + "text": "So it's always there.", + "length": 21 + }, + { + "text": "I want some happiness.", + "length": 22 + }, + { + "text": "360° weeknights 8pm ET.", + "length": 24 + }, + { + "text": "\" Watch Anderson Cooper .", + "length": 25 + }, + { + "text": "\"I gave true brotherhood.", + "length": 25 + }, + { + "text": "But please don't kill me.", + "length": 25 + }, + { + "text": "Maybe I could cure myself.", + "length": 26 + }, + { + "text": "\" Trapped in a man's body .", + "length": 27 + }, + { + "text": "\"There's a lot of prejudice out there.", + "length": 38 + }, + { + "text": "For the latest from AC360° click here.", + "length": 39 + }, + { + "text": "\" Beck recently came out as transgender.", + "length": 40 + }, + { + "text": "\"No one ever met the real me,\" she said.", + "length": 40 + }, + { + "text": "You don't have to like me, I don't care.", + "length": 40 + }, + { + "text": "Navy SEAL's Journey to Coming out Transgender.", + "length": 46 + }, + { + "text": "\"But I don't want you to beat me up and kill me.", + "length": 48 + }, + { + "text": "\"I want to have my life,\" she told CNN's \"AC360.", + "length": 48 + }, + { + "text": "Doing so earlier would have been too big a risk.", + "length": 48 + }, + { + "text": "Transgender men and women are banned from service.", + "length": 50 + }, + { + "text": "And for her entire career, Beck kept her mouth shut.", + "length": 52 + }, + { + "text": "Navy SEAL Kristin Beck says she knows what she wants.", + "length": 53 + }, + { + "text": "She earned a Bronze Star and a Purple Heart along the way.", + "length": 58 + }, + { + "text": "\" Beck says she doesn't need people to love, or even like, her.", + "length": 63 + }, + { + "text": "She wrote about the experience in a book, \"Warrior Princess: A U.", + "length": 65 + }, + { + "text": "\" But the feeling of being born in the wrong body never went away.", + "length": 66 + }, + { + "text": "\"That's a chance that if I took it, I might be dead today,\" she said.", + "length": 69 + }, + { + "text": "Deep down, under various layers, or skins, she hid her female persona.", + "length": 70 + }, + { + "text": "Though her identity was hidden, the rest of what Beck offered was true.", + "length": 71 + }, + { + "text": "It chronicles her life as a young boy and man, known then as Chris Beck.", + "length": 72 + }, + { + "text": "\" \"I fought for 20 years for life, liberty and the pursuit of happiness.", + "length": 72 + }, + { + "text": "Beck deployed 13 times, serving in places such as Bosnia, Afghanistan and Iraq.", + "length": 79 + }, + { + "text": "\" She thought: \"I could totally make it go away if I could be at that top level.", + "length": 80 + }, + { + "text": "(CNN) -- After years spent fighting in some of the world's worst wars, former U.", + "length": 80 + }, + { + "text": "There's been a lot of transgender people who are killed for prejudice, for hatred.", + "length": 82 + }, + { + "text": "\" 'No one ever met the real me' Beck explains her years of hiding as living like an onion.", + "length": 90 + }, + { + "text": "\"You would never notice it because I can push it so deep, but then it does kinda, like, it gnaws at you.", + "length": 104 + }, + { + "text": "\"It is a constant, but as you suppress and as you bottle it up, it's not like on that surface,\" she said.", + "length": 105 + }, + { + "text": "\" Looking back, Beck believes she might have wanted to become a SEAL because they are \"the toughest of the tough.", + "length": 113 + }, + { + "text": "She says virtually no one, out of the thousands of people she worked with, knew her secret -- it was so well hidden.", + "length": 116 + }, + { + "text": "When the book came out -- some amazing support and some amazing praises -- but also some pretty amazing bigotry and hatred.", + "length": 123 + }, + { + "text": "Though she's felt trapped in the wrong body since grade school, Beck didn't come out until after she left the military in 2011.", + "length": 127 + }, + { + "text": "I did my best, 150% all the time, and I gave strength and honor and my full brotherhood to every military person I ever worked with.", + "length": 132 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4751462936401367 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:46.190437289Z", + "first_section_created": "2025-12-23T09:35:46.190781603Z", + "last_section_published": "2025-12-23T09:35:46.190974111Z", + "all_results_received": "2025-12-23T09:35:46.265261078Z", + "output_generated": "2025-12-23T09:35:46.265459386Z", + "total_processing_time_ms": 75, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 74, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:46.190781603Z", + "publish_time": "2025-12-23T09:35:46.190974111Z", + "first_worker_start": "2025-12-23T09:35:46.191606537Z", + "last_worker_end": "2025-12-23T09:35:46.26435Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:46.191561335Z", + "start_time": "2025-12-23T09:35:46.191615638Z", + "end_time": "2025-12-23T09:35:46.19166684Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:46.191855Z", + "start_time": "2025-12-23T09:35:46.191986Z", + "end_time": "2025-12-23T09:35:46.26435Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:46.191639539Z", + "start_time": "2025-12-23T09:35:46.191702141Z", + "end_time": "2025-12-23T09:35:46.191769744Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:46.191544435Z", + "start_time": "2025-12-23T09:35:46.191606537Z", + "end_time": "2025-12-23T09:35:46.191626538Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 72, + "min_processing_ms": 72, + "max_processing_ms": 72, + "avg_processing_ms": 72, + "median_processing_ms": 72, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2613, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/0066bf9a773be95eec8fd4933a2fc66ace346680.json b/data/output/0066bf9a773be95eec8fd4933a2fc66ace346680.json new file mode 100644 index 0000000..4de2879 --- /dev/null +++ b/data/output/0066bf9a773be95eec8fd4933a2fc66ace346680.json @@ -0,0 +1,514 @@ +{ + "file_name": "0066bf9a773be95eec8fd4933a2fc66ace346680.txt", + "total_words": 850, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "a", + "count": 31 + }, + { + "word": "she", + "count": 30 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "her", + "count": 21 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "for", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "'She .", + "length": 6 + }, + { + "text": "Judge .", + "length": 7 + }, + { + "text": "surgery.", + "length": 8 + }, + { + "text": "'There .", + "length": 8 + }, + { + "text": "housing.", + "length": 8 + }, + { + "text": "Fraud Act.", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "This was .", + "length": 10 + }, + { + "text": "scandalous.", + "length": 11 + }, + { + "text": "punishment.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "the system'.", + "length": 12 + }, + { + "text": "was a friend.", + "length": 13 + }, + { + "text": "Anthony Bond .", + "length": 14 + }, + { + "text": "Wilson-Ellis .", + "length": 14 + }, + { + "text": "Prosecuting, .", + "length": 14 + }, + { + "text": "' Prosecutors .", + "length": 15 + }, + { + "text": "Investigators .", + "length": 15 + }, + { + "text": "150 miles away.", + "length": 15 + }, + { + "text": "Tenancy fraud is .", + "length": 18 + }, + { + "text": "was paying me back.", + "length": 19 + }, + { + "text": "'She has lost her job.", + "length": 22 + }, + { + "text": "who was more deserving.", + "length": 23 + }, + { + "text": "for seven months today.", + "length": 23 + }, + { + "text": "theft, pure and simple.", + "length": 23 + }, + { + "text": "15:23 EST, 20 August 2013 .", + "length": 27 + }, + { + "text": "13:26 EST, 20 August 2013 .", + "length": 27 + }, + { + "text": "A barrister who obtained a .", + "length": 28 + }, + { + "text": "'She is extremely remorseful.", + "length": 29 + }, + { + "text": "Wilson-Ellis then applied for a .", + "length": 33 + }, + { + "text": "had false information about her job.", + "length": 36 + }, + { + "text": "She has been jailed for seven months .", + "length": 38 + }, + { + "text": "single mother and was given a council .", + "length": 40 + }, + { + "text": "information about her address and income.", + "length": 41 + }, + { + "text": "'As to the harm [you caused] that too was .", + "length": 43 + }, + { + "text": "He said: 'The effect was to provide you with .", + "length": 46 + }, + { + "text": "Wilson-Ellis owns this property in Nottingham .", + "length": 47 + }, + { + "text": "denied the accommodation that was offered to you.", + "length": 49 + }, + { + "text": "' If Wilson-Ellis had not been caught she would .", + "length": 49 + }, + { + "text": "The need for social housing has never been greater.", + "length": 51 + }, + { + "text": "- around £35,000 in investigation and court costs.", + "length": 51 + }, + { + "text": "officers will come across a worse example of fraud.", + "length": 51 + }, + { + "text": "I had leant her some money previously, to help her .", + "length": 52 + }, + { + "text": "He said: 'It is hard to think that our investigation .", + "length": 54 + }, + { + "text": "Bristol’s Assistant Mayor Gus Hoyt, responsible for .", + "length": 55 + }, + { + "text": "Speaking during her trial, Wilson-Ellis told Bristol Crown .", + "length": 60 + }, + { + "text": "She claimed: 'The lady who stayed with me was not living there.", + "length": 63 + }, + { + "text": "flat in Bristol which she then illegally sub-let for £100-a-week.", + "length": 66 + }, + { + "text": "out so she could help her mum out back in Jamaica for her mum’s .", + "length": 67 + }, + { + "text": "are currently more than 14,000 people on the waiting list in Bristol.", + "length": 69 + }, + { + "text": "led down to the cells, while members of her family blew kisses to her.", + "length": 70 + }, + { + "text": "was sentenced to seven months in prison and wailed loudly as she was .", + "length": 70 + }, + { + "text": "bigger council house in Bristol in September 2011, again giving false .", + "length": 71 + }, + { + "text": "Court she had not rented out her council flat and insisted the tenant .", + "length": 71 + }, + { + "text": "Alan Fuller had asked the judge to order Wilson-Ellis to pay back some .", + "length": 72 + }, + { + "text": "or all of the council’s costs, but the judge declined to impose that .", + "length": 72 + }, + { + "text": "Nadine Wilson-Ellis, 35, told officials she was a penniless unemployed .", + "length": 72 + }, + { + "text": "found she had falsified bank statements and a maternity certificate in .", + "length": 72 + }, + { + "text": "forged bank statements showing an address in Bristol, and a form which .", + "length": 72 + }, + { + "text": "neighbourhoods and council housing, accused her of a 'shocking abuse of .", + "length": 73 + }, + { + "text": "court heard she applied for a council house in September 2008, but used .", + "length": 73 + }, + { + "text": "Bristol, depriving a family in genuine need of much sought-after social .", + "length": 73 + }, + { + "text": "accommodation that you did not need and that was in the place of someone .", + "length": 74 + }, + { + "text": "an attempt to prove she was living in Bristol while she lived and worked .", + "length": 74 + }, + { + "text": "on behalf of the council said the scam cost the taxpayer almost £75,000 .", + "length": 74 + }, + { + "text": "high because your behaviour resulted in a more deserving candidate being .", + "length": 74 + }, + { + "text": "eventually have qualified for the right to buy a larger council house in .", + "length": 74 + }, + { + "text": "Homeowner: This shows another of Wilson Ellis's properties in Nottingham .", + "length": 74 + }, + { + "text": "Wilson-Ellis - who recently qualified as a barrister - made £10,000 from .", + "length": 75 + }, + { + "text": "council flat and sub-let it while already owning two homes has been jailed .", + "length": 76 + }, + { + "text": "She was only caught out when she asked Bristol City Council for a bigger flat.", + "length": 78 + }, + { + "text": "She has had to put in a claim for income support because she has no other means.", + "length": 80 + }, + { + "text": "Had she pleaded guilty at the first hearing, court costs would have been just £3,700.", + "length": 86 + }, + { + "text": "'It robs those who are in genuine need of social housing of a precious and scarce resource.", + "length": 91 + }, + { + "text": "Miss Begum said: 'When she came to Bristol she did just want a haven away from her ex-partner.", + "length": 94 + }, + { + "text": "She was teaching law full-time at a higher education college in Nottingham with a £30,000 salary.", + "length": 98 + }, + { + "text": "'You brazenly lied in the face of the most damning evidence which fortunately the jury saw through.", + "length": 99 + }, + { + "text": "' Defending Wilson-Ellis, Shahida Begum said her client did not financially benefit from the scheme.", + "length": 100 + }, + { + "text": "Locked up: Barrister Nadine Wilson Ellis obtained a council flat and sub-let it while owning two homes.", + "length": 103 + }, + { + "text": "the scam which she used to pay the mortgages on her other homes which were worth £55,000 and £73,000.", + "length": 103 + }, + { + "text": "She has since lost her job as a result of the court proceedings, and is now on benefits and going through a divorce.", + "length": 116 + }, + { + "text": "The council say because she had the flat they had to pay for a genuine claimant to live elsewhere at a cost of £39,000.", + "length": 120 + }, + { + "text": "Michael Longman jailed her for seven months at Bristol Crown Court today after she previously pleading guilty to two offences under the .", + "length": 137 + }, + { + "text": "The mother-of-two - who earned £30,000-a-year as a legal lecturer - lived 150 miles away with her husband in Nottingham in one of her two homes.", + "length": 145 + }, + { + "text": "Jailed: Wilson-Ellis - who recently qualified as a barrister - made £10,000 from the scam which she used to pay the mortgages on her other homes.", + "length": 146 + }, + { + "text": "Scam: The mother-of-two was given this council flat in Bristol which she then illegally sub-let for £100-a-week while she lived with her family in one of two homes in Nottingham .", + "length": 180 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.48014575242996216 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:46.691737082Z", + "first_section_created": "2025-12-23T09:35:46.693704764Z", + "last_section_published": "2025-12-23T09:35:46.693928173Z", + "all_results_received": "2025-12-23T09:35:46.768313443Z", + "output_generated": "2025-12-23T09:35:46.768510652Z", + "total_processing_time_ms": 76, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 74, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:46.693704764Z", + "publish_time": "2025-12-23T09:35:46.693928173Z", + "first_worker_start": "2025-12-23T09:35:46.694419193Z", + "last_worker_end": "2025-12-23T09:35:46.767455Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:46.694409893Z", + "start_time": "2025-12-23T09:35:46.694500997Z", + "end_time": "2025-12-23T09:35:46.694618901Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:46.694666Z", + "start_time": "2025-12-23T09:35:46.694803Z", + "end_time": "2025-12-23T09:35:46.767455Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:46.694421993Z", + "start_time": "2025-12-23T09:35:46.694501897Z", + "end_time": "2025-12-23T09:35:46.694923414Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:46.69434939Z", + "start_time": "2025-12-23T09:35:46.694419193Z", + "end_time": "2025-12-23T09:35:46.694466695Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 72, + "min_processing_ms": 72, + "max_processing_ms": 72, + "avg_processing_ms": 72, + "median_processing_ms": 72, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4843, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/0066d0f0e01f4a979cd039996f210044b8593b93.json b/data/output/0066d0f0e01f4a979cd039996f210044b8593b93.json new file mode 100644 index 0000000..c58e23e --- /dev/null +++ b/data/output/0066d0f0e01f4a979cd039996f210044b8593b93.json @@ -0,0 +1,294 @@ +{ + "file_name": "0066d0f0e01f4a979cd039996f210044b8593b93.txt", + "total_words": 554, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "was", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "torre", + "count": 8 + }, + { + "word": "baby", + "count": 7 + }, + { + "word": "s", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Helen Pow .", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "I hugged her.", + "length": 13 + }, + { + "text": "Louis Cardinals.", + "length": 16 + }, + { + "text": "'The baby was shaken.", + "length": 21 + }, + { + "text": "19:56 EST, 19 June 2013 .", + "length": 25 + }, + { + "text": "23:07 EST, 19 June 2013 .", + "length": 25 + }, + { + "text": "I am just so proud of her.", + "length": 26 + }, + { + "text": "It was the catch of a lifetime.", + "length": 31 + }, + { + "text": "Everyone was going up to the woman and hugging her.", + "length": 51 + }, + { + "text": "'It took a long time for them to get to the parents.", + "length": 52 + }, + { + "text": "The baby was taken to Lutheran Hospital though just for observation.", + "length": 68 + }, + { + "text": "'The woman caught the baby,' onlooker Kristen Bramsen, 48, told The New York Daily News.", + "length": 88 + }, + { + "text": "I looked over and the father was on the ground clutching his heart,' Bramsen told The Post.", + "length": 91 + }, + { + "text": "A neighbor described the parents as 'gypsies' who regularly read tarot cards in the street.", + "length": 91 + }, + { + "text": "' He added that while his daughter didn't play a lot of sports growing up 'she always had good hands.", + "length": 101 + }, + { + "text": "Hero: Cristina Torre, pictured in 2005, caught the baby in her arms when he fell from a shop awning .", + "length": 101 + }, + { + "text": "As she was calling 911, the baby fell, but he landed safety in Torre's outstretched arms, witnesses said.", + "length": 105 + }, + { + "text": "' She added in an interview with The New York Post: 'I really think the baby would have been dead if it wasn't for the woman.", + "length": 125 + }, + { + "text": "Cristina Torre, 44, spotted the 1-year-old boy hanging from the front of a store on 3rd Avenue and 92nd street at around 10 a.", + "length": 126 + }, + { + "text": "The daughter of former Yankee legend Joe Torre caught a baby who was dangling from the awning of a building in Brooklyn on Wednesday.", + "length": 133 + }, + { + "text": "Lucky escape: The little boy slipped through the window of his family's second floor apartment and landed on the awning of the store, pictured .", + "length": 144 + }, + { + "text": "They face charges of endangering the welfare of a child and neglect and their three other children, aged 2, 3 and 5, have been placed in state care.", + "length": 148 + }, + { + "text": "' Joe Torre had a more than 17-year career as a catcher, first baseman and third baseman for teams including the Atlanta Braves, the New York Mets and the St.", + "length": 158 + }, + { + "text": "At Yankee Stadium on Wednesday, Joe Torre told The Post his daughter's magic catch was as good as anything he'd ever accomplished while managing the Yankees between 1996 and 2007.", + "length": 179 + }, + { + "text": "'Your children, that's your proudest moments and when they are responsible and caring and all that stuff it is a proud moment, especially in today's world where we seem to glorify bad things.", + "length": 191 + }, + { + "text": "'No question this ranks right up there with all that's happened to me,' said the 72-year-old who brought the team to four World Series titles and is now MLB's executive vice president of baseball operations.", + "length": 207 + }, + { + "text": "According to The Post, the parents were fast asleep when the child fell, though both - Sam Miller, 23, and Tiffany Demetria, 24 - were handcuffed and taken into custody by police after the frightening incident.", + "length": 210 + }, + { + "text": "' Police later determined the little boy, whose name hasn't been revealed, slipped through an open window after pushing out a piece of cardboard that had been placed next to an air conditioner at his second floor apartment.", + "length": 223 + }, + { + "text": "Proud: Former New York Yankees manager Joe Torre, pictured left, said he was very proud of his daughter who 'always had good hands' Quick-thinking: Torre, pictured left and right, heroically caught the unnamed child while calling 911 .", + "length": 235 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5059016346931458 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:47.194353211Z", + "first_section_created": "2025-12-23T09:35:47.194680024Z", + "last_section_published": "2025-12-23T09:35:47.19484653Z", + "all_results_received": "2025-12-23T09:35:47.265031941Z", + "output_generated": "2025-12-23T09:35:47.265199748Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:47.194680024Z", + "publish_time": "2025-12-23T09:35:47.19484653Z", + "first_worker_start": "2025-12-23T09:35:47.19535455Z", + "last_worker_end": "2025-12-23T09:35:47.264242Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:47.195380351Z", + "start_time": "2025-12-23T09:35:47.195428652Z", + "end_time": "2025-12-23T09:35:47.195490855Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:47.195565Z", + "start_time": "2025-12-23T09:35:47.195682Z", + "end_time": "2025-12-23T09:35:47.264242Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:47.195337649Z", + "start_time": "2025-12-23T09:35:47.195395251Z", + "end_time": "2025-12-23T09:35:47.195480654Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:47.195290547Z", + "start_time": "2025-12-23T09:35:47.19535455Z", + "end_time": "2025-12-23T09:35:47.195382351Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3054, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/0066fae0a34c538a83b1893789cb67f57b22aa5f.json b/data/output/0066fae0a34c538a83b1893789cb67f57b22aa5f.json new file mode 100644 index 0000000..662420e --- /dev/null +++ b/data/output/0066fae0a34c538a83b1893789cb67f57b22aa5f.json @@ -0,0 +1,492 @@ +{ + "file_name": "0066fae0a34c538a83b1893789cb67f57b22aa5f.txt", + "total_words": 1015, + "top_n_words": [ + { + "word": "the", + "count": 60 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "s", + "count": 19 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "iran", + "count": 15 + }, + { + "word": "iranian", + "count": 15 + }, + { + "word": "eu", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "’ Many .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "negotiations.", + "length": 13 + }, + { + "text": "The war games .", + "length": 15 + }, + { + "text": "for peaceful purposes.", + "length": 22 + }, + { + "text": "Iranian Tanker Company.", + "length": 23 + }, + { + "text": "06:44 EST, 18 October 2012 .", + "length": 28 + }, + { + "text": "05:29 EST, 18 October 2012 .", + "length": 28 + }, + { + "text": "Sara Malm and Emma Reynolds .", + "length": 29 + }, + { + "text": "cranking up financial pressure.", + "length": 31 + }, + { + "text": "European Union governments imposed .", + "length": 36 + }, + { + "text": "’ Hague said such pressure would .", + "length": 36 + }, + { + "text": "He spoke on the sidelines of urban .", + "length": 36 + }, + { + "text": "Iran is still refusing to cooperate .", + "length": 37 + }, + { + "text": "as the trigger for a regional conflict.", + "length": 39 + }, + { + "text": "The exercises were dubbed 'Ila Beit ol .", + "length": 40 + }, + { + "text": "Meanwhile, a leading European satellite .", + "length": 41 + }, + { + "text": "violation of its international obligations.", + "length": 43 + }, + { + "text": "and threats to sue from Iranian state television.", + "length": 49 + }, + { + "text": "Basiji, who are controlled by the Revolutionary Guard.", + "length": 54 + }, + { + "text": "Moqaddas,' or Toward the Holy City, meaning Jerusalem.", + "length": 54 + }, + { + "text": "asset freezes and trade restrictions on more companies.", + "length": 55 + }, + { + "text": "proxies, such as Lebanon's Hezbollah, on Israel's borders.", + "length": 58 + }, + { + "text": "The implication is that such a clash could draw in Iranian .", + "length": 60 + }, + { + "text": "Iranian Oil Company, a large crude exporter, and the National .", + "length": 63 + }, + { + "text": "sanctions on Tuesday against major state companies in the oil .", + "length": 63 + }, + { + "text": "Iranian officials say the nuclear programme is intended solely .", + "length": 64 + }, + { + "text": "combat drills in Tehran by 15,000 paramilitary fighters known as .", + "length": 66 + }, + { + "text": "countries fear that Iran is working to develop nuclear weapons but .", + "length": 68 + }, + { + "text": "and gas industry and strengthened restrictions on the central bank, .", + "length": 69 + }, + { + "text": "include drills on defending against mock air raids and other threats.", + "length": 69 + }, + { + "text": "result of earlier sanctions which prompted accusations of censorship .", + "length": 70 + }, + { + "text": "More than 30 firms and institutions were listed in the EU's Official .", + "length": 70 + }, + { + "text": "trade, energy and transport sectors' against Iran as well as imposing .", + "length": 71 + }, + { + "text": "Journal as targets for asset freezes in the EU, including the National .", + "length": 72 + }, + { + "text": "continue to mount ‘over the coming months unless negotiations succeed.", + "length": 72 + }, + { + "text": "’ Press TV says it was among the channels cut by the Eutelsat decision.", + "length": 73 + }, + { + "text": "fully with the International Atomic Energy Agency regarding its nuclear .", + "length": 73 + }, + { + "text": "provider took 19 Iranian television and radio broadcasters off the air, a .", + "length": 75 + }, + { + "text": "programme, something the Luxembourg meeting said was ‘acting in flagrant .", + "length": 76 + }, + { + "text": "Monday's meeting approved 'additional restrictive measures in the financial, .", + "length": 78 + }, + { + "text": "seek to reduce Tehran's access to cash by forcing Western companies to halt trade with Iran.", + "length": 92 + }, + { + "text": "Possible base: An alleged facility in Natanaz, Iran which the regime also claims is completely harmless .", + "length": 105 + }, + { + "text": "General Hossein Salami said Israel will 'definitely' face fierce retaliation if it attacks Iranian nuclear sites.", + "length": 113 + }, + { + "text": "His words, reported by the semiofficial ISNA news agency, seem to be part of Iran's efforts to portray any strike .", + "length": 115 + }, + { + "text": "Tough times: General Hossein Salami spoke out in the face of punitive EU sanctions against Iran over its nuclear programme .", + "length": 124 + }, + { + "text": "Others include Farsi-language channels for Iranian expatriates and Arabic-language offerings, including the news channel Al-Alam.", + "length": 129 + }, + { + "text": "’ German Foreign Minister Guido Westerwelle said the ministers had also banned the import of Iranian natural gas into EU nations.", + "length": 131 + }, + { + "text": "The European broadcast satellite cutoff means state broadcaster Irib’s television channels are no longer shown in Europe and elsewhere.", + "length": 137 + }, + { + "text": "Willing to retaliate: Iran will respond to offensive action from Israel with force, warned he acting commander of the Revolutionary Guard .", + "length": 139 + }, + { + "text": "The European Union is currently imposing new sanctions against Iran in a bid to force the country to come clean about its nuclear programme.", + "length": 140 + }, + { + "text": "Irib was targeted in a round of EU sanctions against Iran adopted in March after European officials said its broadcasts violated human rights.", + "length": 142 + }, + { + "text": "The acting commander of Iran's Revolutionary Guard has warned that the country is prepared for 'global battles' if its nuclear sites are attacked.", + "length": 146 + }, + { + "text": "Iran's state-run Press TV said Irib could take legal action against Eutelsat over the cutoff ‘to compensate for any material and spiritual damages.", + "length": 149 + }, + { + "text": "The EU ministers also agreed to prohibit all transactions between EU and Iranian banks unless they were authorised in advance for humanitarian reasons.", + "length": 151 + }, + { + "text": "’ Maryam Rajavi, president-elect of the Iranian Resistance, an organisation seeking to oust Ahmadinejad’s regime, welcomed the decision to expand sanctions.", + "length": 160 + }, + { + "text": "Defiance: A protest against the Iranian Revolutionary Guards last month in Lebanon, which Gen Hossein warned could be drawn in to any future clash with Israel .", + "length": 160 + }, + { + "text": "Both are vital to the Iranian oil industry, the main source of revenue for the government, and are growing increasingly important in recent months as the EU and U.", + "length": 163 + }, + { + "text": "Financial restrictions: The EU's foreign ministers hope that the new sanctions against Iranian trade and freezing of assets will hit the Islamic nation's treasury .", + "length": 164 + }, + { + "text": "Suspected site: Lavizan Shiyan, a restricted area next to a military complex in a Tehran suburb, was thought to be a possible location for weapons of mass destruction .", + "length": 168 + }, + { + "text": "She called the move ‘an essential step to preclude this regime from acquiring nuclear weapons,’ and asked the EU to sever all economic and commercial ties with Iran.", + "length": 169 + }, + { + "text": "No transparency: Iranian President Mahmoud Ahmadinejad is still refusing to come clean about the country's nuclear programme insisting it is intended for peaceful purposes .", + "length": 173 + }, + { + "text": "Inflammatory words: Gen Salami made his comments on the sidelines of combat drills by paramilitary fighters controlled by the Revolutionary Guard, known as Basiji, pictured .", + "length": 174 + }, + { + "text": "Worry: Both German Foreign Minister Guido Westerwelle, left, and Britain's William Hague spoke positively about the sanctions imposed on Iran at Monday's EU meeting in Luxembourg .", + "length": 180 + }, + { + "text": "On his way into Monday's meeting, British Foreign Secretary William Hague said new sanctions would be ‘a sign of our resolve in the European Union that we will step up the pressure.", + "length": 183 + }, + { + "text": "A meeting of EU foreign ministers on Monday imposed restrictive measures intended to hit the country's treasury and increase pressure on its Islamic regime headed by President Mahmoud Ahmadinejad.", + "length": 196 + }, + { + "text": "In a statement, Press TV said the move ‘shows that the European Union does not respect freedom of speech and is a step to mute all alternative news outlets representing the voice of the voiceless.", + "length": 198 + }, + { + "text": "They tightened restrictions on the Central Bank of Iran and imposed more export restrictions ‘notably for graphite, metals, software for industrial purposes, as well as measures related to the shipbuilding industry.", + "length": 217 + }, + { + "text": "Satellite provider Eutelsat agreed with media services company Arqiva to block Irib's nine TV channels and ten radio stations as of Monday morning because of ‘reinforced EU council sanctions,’ Eutelsat spokeswoman Vanessa O'Connor said.", + "length": 240 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6953611075878143 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:47.695616375Z", + "first_section_created": "2025-12-23T09:35:47.695939487Z", + "last_section_published": "2025-12-23T09:35:47.696260599Z", + "all_results_received": "2025-12-23T09:35:47.795293725Z", + "output_generated": "2025-12-23T09:35:47.795538935Z", + "total_processing_time_ms": 99, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 99, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:47.695939487Z", + "publish_time": "2025-12-23T09:35:47.696129394Z", + "first_worker_start": "2025-12-23T09:35:47.696691916Z", + "last_worker_end": "2025-12-23T09:35:47.793961Z", + "total_journey_time_ms": 98, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:47.696614613Z", + "start_time": "2025-12-23T09:35:47.696691916Z", + "end_time": "2025-12-23T09:35:47.696777519Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:47.696819Z", + "start_time": "2025-12-23T09:35:47.696949Z", + "end_time": "2025-12-23T09:35:47.793961Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 97 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:47.696737718Z", + "start_time": "2025-12-23T09:35:47.69680342Z", + "end_time": "2025-12-23T09:35:47.696934625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:47.696615413Z", + "start_time": "2025-12-23T09:35:47.696698216Z", + "end_time": "2025-12-23T09:35:47.696776019Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:47.696199397Z", + "publish_time": "2025-12-23T09:35:47.696260599Z", + "first_worker_start": "2025-12-23T09:35:47.696695316Z", + "last_worker_end": "2025-12-23T09:35:47.770645Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:47.696727217Z", + "start_time": "2025-12-23T09:35:47.696765019Z", + "end_time": "2025-12-23T09:35:47.69679782Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:47.696913Z", + "start_time": "2025-12-23T09:35:47.697054Z", + "end_time": "2025-12-23T09:35:47.770645Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:47.696725317Z", + "start_time": "2025-12-23T09:35:47.69678172Z", + "end_time": "2025-12-23T09:35:47.696824321Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:47.696660115Z", + "start_time": "2025-12-23T09:35:47.696695316Z", + "end_time": "2025-12-23T09:35:47.696710717Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 170, + "min_processing_ms": 73, + "max_processing_ms": 97, + "avg_processing_ms": 85, + "median_processing_ms": 97, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3263, + "slowest_section_id": 0, + "slowest_section_time_ms": 98 + } +} diff --git a/data/output/006711355819fedac864c1fd2f0156c57fbd6310.json b/data/output/006711355819fedac864c1fd2f0156c57fbd6310.json new file mode 100644 index 0000000..5927451 --- /dev/null +++ b/data/output/006711355819fedac864c1fd2f0156c57fbd6310.json @@ -0,0 +1,238 @@ +{ + "file_name": "006711355819fedac864c1fd2f0156c57fbd6310.txt", + "total_words": 558, + "top_n_words": [ + { + "word": "of", + "count": 21 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "the", + "count": 16 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "drinking", + "count": 9 + }, + { + "word": "they", + "count": 9 + }, + { + "word": "alcohol", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "that this is a public health issue that needs to be addressed.", + "length": 62 + }, + { + "text": "The findings appear in the American Journal of Geriatric Psychiatry.", + "length": 68 + }, + { + "text": "'This isn’t to say that people need to abstain from alcohol altogether.", + "length": 73 + }, + { + "text": "Drinking too much in middle age can lead to memory loss in later life, a study has found.", + "length": 89 + }, + { + "text": "They found that a history of problem drinking more than doubled the risk of developing severe memory impairment.", + "length": 113 + }, + { + "text": "'But this is only one part of the puzzle and we know little about the consequences of alcohol consumption earlier in life.", + "length": 122 + }, + { + "text": "Researchers found that those with a history of problem drinking in middle-age more than doubled their risk of memory loss in later life .", + "length": 137 + }, + { + "text": "'However, there’s also a hidden cost of alcohol abuse given the mounting evidence that alcohol abuse can also impact on cognition later in life.", + "length": 146 + }, + { + "text": "Scientists questioned 6,542 American middle-adults about their past alcohol consumption and assessed their mental abilities over a period of eight years.", + "length": 153 + }, + { + "text": "As well as eating a healthy diet, not smoking and maintaining a healthy weight, the odd glass of red wine could even help reduce your risk of developing dementia.", + "length": 162 + }, + { + "text": "' He added: 'This finding - that middle-aged people with a history of problem drinking more than double their chances of memory impairment when they are older - suggests...", + "length": 172 + }, + { + "text": "'What we did here is investigate the relatively unknown association between having a drinking problem at any point in life and experiencing problems with memory later in life.", + "length": 175 + }, + { + "text": "Dr Doug Brown, director of research and development at the Alzheimer’s Society charity, said: 'When we talk about drinking too much, the media often focuses on young people ending up in A\u0026E after a night out.", + "length": 210 + }, + { + "text": "'Although studies such as this one can be very useful for observing health trends, it’s important to note that they are not able to show cause and effect, and it’s not clear whether other factors may also have influenced these results.", + "length": 239 + }, + { + "text": "' Dr Eric Karran, science director at Alzheimer’s Research UK, said: 'One strength of this study is its long time period: as dementia develops slowly over a number of years it’s crucial to understand what factors could affect our risk of the condition earlier in life.", + "length": 272 + }, + { + "text": "Lead researcher Dr Iain Lang, from the University of Exeter Medical School, said: 'We already know there is an association between dementia risk and levels of current alcohol consumption - that understanding is based on asking older people how much they drink and then observing whether they develop problems.", + "length": 309 + }, + { + "text": "' Participants were asked four specific questions: had they ever felt they should cut down on their drinking, had people annoyed them by criticising their drinking, had they ever felt bad or guilty about their drinking, and had they ever had a drink first thing in the morning to steady nerves or get rid of a hangover.", + "length": 319 + }, + { + "text": "Researchers said concerns over alcohol abuse often focused on the short-term effects of people ending up in A\u0026E after a night out, while the long-term effects were ignored (file image) 'This small study shows that people who admitted to alcohol abuse at some point in their lives were twice as likely to have severe memory problems, and as the research relied on self-reporting that number may be even higher.", + "length": 409 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5054916739463806 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:48.197039344Z", + "first_section_created": "2025-12-23T09:35:48.197366457Z", + "last_section_published": "2025-12-23T09:35:48.197574965Z", + "all_results_received": "2025-12-23T09:35:48.25439146Z", + "output_generated": "2025-12-23T09:35:48.254563866Z", + "total_processing_time_ms": 57, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 56, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:48.197366457Z", + "publish_time": "2025-12-23T09:35:48.197574965Z", + "first_worker_start": "2025-12-23T09:35:48.198135287Z", + "last_worker_end": "2025-12-23T09:35:48.253513Z", + "total_journey_time_ms": 56, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:48.198103185Z", + "start_time": "2025-12-23T09:35:48.198173488Z", + "end_time": "2025-12-23T09:35:48.198238791Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:48.198372Z", + "start_time": "2025-12-23T09:35:48.198516Z", + "end_time": "2025-12-23T09:35:48.253513Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:48.198066584Z", + "start_time": "2025-12-23T09:35:48.198135287Z", + "end_time": "2025-12-23T09:35:48.198201789Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:48.198140287Z", + "start_time": "2025-12-23T09:35:48.198203389Z", + "end_time": "2025-12-23T09:35:48.19822889Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 54, + "min_processing_ms": 54, + "max_processing_ms": 54, + "avg_processing_ms": 54, + "median_processing_ms": 54, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3272, + "slowest_section_id": 0, + "slowest_section_time_ms": 56 + } +} diff --git a/data/output/00672d5c2055608b747a90a0f5ae32c5b340173e.json b/data/output/00672d5c2055608b747a90a0f5ae32c5b340173e.json new file mode 100644 index 0000000..d265165 --- /dev/null +++ b/data/output/00672d5c2055608b747a90a0f5ae32c5b340173e.json @@ -0,0 +1,354 @@ +{ + "file_name": "00672d5c2055608b747a90a0f5ae32c5b340173e.txt", + "total_words": 479, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "what", + "count": 12 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "that", + "count": 8 + }, + { + "word": "you", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "8.", + "length": 2 + }, + { + "text": "4.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "6.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "9.", + "length": 2 + }, + { + "text": "7.", + "length": 2 + }, + { + "text": "5.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "10.", + "length": 3 + }, + { + "text": "S.?", + "length": 3 + }, + { + "text": "military?", + "length": 9 + }, + { + "text": "FEEDBACK .", + "length": 10 + }, + { + "text": "TRANSCRIPT .", + "length": 12 + }, + { + "text": "What major U.", + "length": 13 + }, + { + "text": "DAILY CURRICULUM .", + "length": 18 + }, + { + "text": "president before Congress?", + "length": 26 + }, + { + "text": "Media Literacy Question of the Day: .", + "length": 37 + }, + { + "text": "Thank you for using CNN Student News!", + "length": 37 + }, + { + "text": "(CNN Student News) -- January 31, 2014 .", + "length": 40 + }, + { + "text": "Write your answers in the space provided.", + "length": 41 + }, + { + "text": "Click here to submit your Roll Call request.", + "length": 44 + }, + { + "text": "What war-torn country's largest city is Aleppo?", + "length": 47 + }, + { + "text": "What is the title of the annual speech given by the U.", + "length": 54 + }, + { + "text": "We're looking for your feedback about CNN Student News.", + "length": 55 + }, + { + "text": "What organization oversees most college sports in the U.", + "length": 56 + }, + { + "text": "What number is represented by the Roman numerals XLVIII?", + "length": 56 + }, + { + "text": "Also, feel free to tell us how you use them in your classroom.", + "length": 62 + }, + { + "text": "Click here for a printable version of the Daily Curriculum (PDF).", + "length": 65 + }, + { + "text": "What animal is associated with the Chinese New Year that begins today?", + "length": 70 + }, + { + "text": "Click here to access the transcript of today's CNN Student News program.", + "length": 72 + }, + { + "text": "What term refers to markets of smaller countries that are starting to grow?", + "length": 75 + }, + { + "text": "The Rangers are a special operations unit associated with what branch of the U.", + "length": 79 + }, + { + "text": "The educators on our staff will monitor this page and may respond to your comments as well.", + "length": 91 + }, + { + "text": "city suffered historic transportation gridlock on its roads resulting from a snow and ice storm?", + "length": 96 + }, + { + "text": "We hope you use our free daily materials along with the program, and we welcome your feedback on them.", + "length": 102 + }, + { + "text": "If you were reporting on a cultural tradition, what elements and perspectives would you include, and why?", + "length": 105 + }, + { + "text": "Weekly Newsquiz: The following questions relate to events that were covered this week on CNN Student News.", + "length": 106 + }, + { + "text": "On this page you will find today's show Transcript, the Daily Curriculum, and a place for you to leave feedback.", + "length": 112 + }, + { + "text": "What word, from an Old French term meaning \"undertake,\" is a term for someone who organizes and manages a business?", + "length": 115 + }, + { + "text": "We'll also discuss a new legal development in the case of an accused terrorist, and we'll examine Super Bowl security.", + "length": 118 + }, + { + "text": "Please note that there may be a delay between the time when the video is available and when the transcript is published.", + "length": 120 + }, + { + "text": "Please use this page to leave us comments about today's program, including what you think about our stories and our resources.", + "length": 126 + }, + { + "text": "What country experienced a revolution on January 25, 2011 that was marked with protests and celebrations this week in Tahrir Square?", + "length": 132 + }, + { + "text": "CNN Student News is created by a team of journalists and educators who consider the Common Core State Standards, national standards in different subject areas, and state standards when producing the show and curriculum.", + "length": 219 + }, + { + "text": "This Friday, CNN Student News is all about journeys: the nationwide one that millions of Chinese are taking for the Lunar New Year, the harrowing one that led thousands of Atlantans to abandon their cars, and the first one that a baby polar bear took in the snow.", + "length": 263 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6070095300674438 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:48.69835911Z", + "first_section_created": "2025-12-23T09:35:48.698719924Z", + "last_section_published": "2025-12-23T09:35:48.698909331Z", + "all_results_received": "2025-12-23T09:35:48.762179775Z", + "output_generated": "2025-12-23T09:35:48.762325081Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:48.698719924Z", + "publish_time": "2025-12-23T09:35:48.698909331Z", + "first_worker_start": "2025-12-23T09:35:48.699489454Z", + "last_worker_end": "2025-12-23T09:35:48.761357Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:48.699429551Z", + "start_time": "2025-12-23T09:35:48.699489454Z", + "end_time": "2025-12-23T09:35:48.699554756Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:48.699698Z", + "start_time": "2025-12-23T09:35:48.699848Z", + "end_time": "2025-12-23T09:35:48.761357Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:48.699441052Z", + "start_time": "2025-12-23T09:35:48.699495854Z", + "end_time": "2025-12-23T09:35:48.699557756Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:48.699430851Z", + "start_time": "2025-12-23T09:35:48.699495054Z", + "end_time": "2025-12-23T09:35:48.699517655Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2757, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/006754d905db204ca0ab0d99ac8a2594bdcffe62.json b/data/output/006754d905db204ca0ab0d99ac8a2594bdcffe62.json new file mode 100644 index 0000000..a5e60f8 --- /dev/null +++ b/data/output/006754d905db204ca0ab0d99ac8a2594bdcffe62.json @@ -0,0 +1,584 @@ +{ + "file_name": "006754d905db204ca0ab0d99ac8a2594bdcffe62.txt", + "total_words": 1557, + "top_n_words": [ + { + "word": "the", + "count": 97 + }, + { + "word": "he", + "count": 53 + }, + { + "word": "of", + "count": 51 + }, + { + "word": "in", + "count": 50 + }, + { + "word": "and", + "count": 45 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "to", + "count": 33 + }, + { + "word": "his", + "count": 29 + }, + { + "word": "specter", + "count": 26 + }, + { + "word": "s", + "count": 25 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "G.", + "length": 2 + }, + { + "text": "...", + "length": 3 + }, + { + "text": "Sen.", + "length": 4 + }, + { + "text": "Kennedy.", + "length": 8 + }, + { + "text": "He was 82.", + "length": 10 + }, + { + "text": "Joe Sestak.", + "length": 11 + }, + { + "text": "Former Gov.", + "length": 11 + }, + { + "text": "He was in the U.", + "length": 16 + }, + { + "text": "(CNN) -- Former U.", + "length": 18 + }, + { + "text": "Specter ran for the U.", + "length": 22 + }, + { + "text": "\"He didn't look at polls.", + "length": 25 + }, + { + "text": "registered as a Republican.", + "length": 27 + }, + { + "text": "Gerald Ford, later president.", + "length": 29 + }, + { + "text": "\"He had a prosecutorial mindset.", + "length": 32 + }, + { + "text": "\" Specter straddled right and left.", + "length": 35 + }, + { + "text": "Readers wished the best for Specter .", + "length": 37 + }, + { + "text": "He had a kind of caustic sense of humor.", + "length": 40 + }, + { + "text": "\"He always came across as kind of urbane.", + "length": 41 + }, + { + "text": "Obituaries 2012: The lives they've lived .", + "length": 42 + }, + { + "text": "Supreme Court nominees, the Penn bio says.", + "length": 42 + }, + { + "text": "He supported embryonic stem cell research.", + "length": 42 + }, + { + "text": "John Connally were caused by the same bullet.", + "length": 45 + }, + { + "text": "\"He didn't shy away from pork,\" Madonna said.", + "length": 45 + }, + { + "text": "reviled by those on both the right and the left.", + "length": 48 + }, + { + "text": "He lost an election for Philadelphia mayor in 1967.", + "length": 51 + }, + { + "text": "He participated in the confirmation hearings of 14 U.", + "length": 53 + }, + { + "text": "But he eventually went east for his higher education.", + "length": 53 + }, + { + "text": "He could be incredibly persuasive as an interrogator.", + "length": 53 + }, + { + "text": "CNN's Sarah Hoye in Philadelphia contributed to this report.", + "length": 60 + }, + { + "text": "He didn't track how his comments were playing out in the press.", + "length": 63 + }, + { + "text": "He served on the Warren Commission at the recommendation of Rep.", + "length": 64 + }, + { + "text": "And he was a senior member of the Senate Appropriations Committee.", + "length": 66 + }, + { + "text": "And he's remembered across the state's 67 counties for his efforts.", + "length": 67 + }, + { + "text": "\"The Republicanism in his day, it was a different kind of Republican.", + "length": 69 + }, + { + "text": "He returned to his studies and graduated from Yale Law School in 1956.", + "length": 70 + }, + { + "text": "He ran for governor but was defeated by Dick Thornburgh in the primary.", + "length": 71 + }, + { + "text": "The move gave Democrats a 60-seat filibuster-proof majority in the Senate.", + "length": 74 + }, + { + "text": "Senate in 1976, but he was defeated in the Republican primary by John Heinz.", + "length": 76 + }, + { + "text": "\"No member of Congress shaped the Supreme Court more than he did,\" Madonna said.", + "length": 80 + }, + { + "text": "Decades after he switched to the Republican Party, he changed his stripes again.", + "length": 80 + }, + { + "text": "\"There isn't any doubt in many respects he was an unusual politician,\" Madonna said.", + "length": 84 + }, + { + "text": "\" And at the White House, President Barack Obama said Specter \"was always a fighter.", + "length": 84 + }, + { + "text": "But he won his bid for Senate in 1980 and distinguished himself, serving until 2011.", + "length": 84 + }, + { + "text": "But in 2010, when he ran for re-election, Specter lost the Democratic primary to Rep.", + "length": 85 + }, + { + "text": "He served as chairman of the Senate Select Committee on Intelligence from 1995 to 1997.", + "length": 87 + }, + { + "text": "He served on the Senate Judiciary Committee, of which he was chairman from 2005 to 2007.", + "length": 88 + }, + { + "text": "After Yale, he started practicing law and became an assistant district attorney in Philadelphia.", + "length": 96 + }, + { + "text": "and under-developed countries,\" according to a bio from the University of Pennsylvania Law School.", + "length": 98 + }, + { + "text": "He was a Philadelphian, and not into that staunchly conservative Republicanism that we see\" today.", + "length": 98 + }, + { + "text": "In 2006, Philadelphia magazine called him \"one of the few true wild cards of Washington politics ...", + "length": 100 + }, + { + "text": "After graduating from Russell High School in 1947, Specter first went to the University of Oklahoma.", + "length": 100 + }, + { + "text": "He criticized Republicans for President Bill Clinton's impeachment and voted in favor of the Iraq war.", + "length": 102 + }, + { + "text": "Specter died of complications from non-Hodgkin's lymphoma at his home in Philadelphia, his family said.", + "length": 103 + }, + { + "text": "\"The last thing you would have thought about Arlen Specter was that he was born in Kansas,\" Madonna said.", + "length": 105 + }, + { + "text": "\" The senator practiced law when he wasn't in office and authored books throughout his career, including: .", + "length": 107 + }, + { + "text": "He became a Democrat in 2009, saying Republicans had moved too far to the right and embraced social conservatism.", + "length": 113 + }, + { + "text": "\" Biden will travel to Penn Valley, Pennsylvania, on Tuesday for Specter's funeral, according to the White House.", + "length": 113 + }, + { + "text": "He had a few election losses but he was undeterred by defeat, the prospects of losing and the challenges he faced.", + "length": 114 + }, + { + "text": "During the 1990s, he briefly announced a run for president but eventually dropped the effort and endorsed Bob Dole.", + "length": 115 + }, + { + "text": "Sestak, who went on to lose the race to Toomey, praised Specter via Twitter as \"a warrior of inestimable public service.", + "length": 120 + }, + { + "text": "He served as district attorney until 1974 and prosecuted corruption cases against Philadelphia magistrates and Teamsters.", + "length": 121 + }, + { + "text": "\" But Specter in fact was born in Wichita, the youngest child of Lillie Shanin and Harry Specter, an immigrant from Ukraine.", + "length": 124 + }, + { + "text": "Like many of his moderate compatriots, he came to be viewed by the new conservatives as a RINO -- a Republican in Name Only.", + "length": 124 + }, + { + "text": "The veteran Pennsylvania politician had overcome numerous serious illnesses over the past two decades, including a brain tumor.", + "length": 127 + }, + { + "text": "Specter was elected to the Senate in 1980 and represented Pennsylvania for 30 years, longer than anyone in the state's history.", + "length": 127 + }, + { + "text": "He grew up in Russell, Kansas, also the hometown of another Republican icon, a one-time presidential nominee and senator, Bob Dole.", + "length": 131 + }, + { + "text": "Despite his longtime membership in the Republican Party, Specter became more alienated from the party as it grew more conservative.", + "length": 131 + }, + { + "text": "Air Force during the Korean War from 1951 to 1953, serving as a second lieutenant in the Air Force Office of Special Investigations.", + "length": 132 + }, + { + "text": "Madonna called Specter an \"indefatigable\" public figure, highly demanding of both himself and those who worked for him over the years.", + "length": 134 + }, + { + "text": "\"Arlen's knowledge of the inner workings of the government and lawmaking is second to none,\" said Michael Fitts, the law school's dean.", + "length": 135 + }, + { + "text": "Specter is credited with co-authoring the \"single bullet theory,\" which suggested that some of the wounds to Kennedy and then-Texas Gov.", + "length": 136 + }, + { + "text": "He was also one of America's most prominent Jewish politicians, a rare Republican in a category dominated by Democrats over the decades.", + "length": 136 + }, + { + "text": "\"He was fundamentally a pragmatist who could bend with the times,\" Madonna said, and he believed greatly that government could help people.", + "length": 139 + }, + { + "text": "He had been in the public eye since serving as a member of the Warren Commission, which investigated the assassination of President John F.", + "length": 139 + }, + { + "text": "\" And Pat Toomey, the Republican who now holds Specter's old Senate seat, praised him as \"a man of sharp intelligence and dogged determination.", + "length": 143 + }, + { + "text": "His politically moderate image fit hand-in-glove in the politically blue Northeast, both with its Democratic centrists and its liberal Republicans.", + "length": 147 + }, + { + "text": "\" \"Charming and churlish, brilliant and pedantic, he can be fiercely independent, entertainingly eccentric and simply maddening,\" the profile read.", + "length": 147 + }, + { + "text": "And Vice President Joe Biden lamented the loss of \"my friend,\" \"who never walked away from his principles and was at his best when they were challenged.", + "length": 152 + }, + { + "text": "He earned a bachelor's degree in international relations in 1951 from the University of Pennsylvania, in Philadelphia, where he graduated Phi Beta Kappa.", + "length": 153 + }, + { + "text": "He is remembered for leading the charge against conservative nominee Robert Bork and going after Anita Hill, who accused nominee Clarence Thomas of harassment.", + "length": 159 + }, + { + "text": "\" After the loss, Specter moved from the halls of Congress to those of academia, taking on a new role at the University of Pennsylvania Law School as an adjunct professor.", + "length": 171 + }, + { + "text": "Specter brought more financial resources to Pennsylvania than anyone in the state, working with mayors and other local leaders to help them get grants and aid, Madonna said.", + "length": 173 + }, + { + "text": "And his name is synonymous with Pennsylvania, an idiosyncratic state that pushes and pulls between the two parties, and his home, the staunchly Democratic city of Philadelphia.", + "length": 176 + }, + { + "text": "Terry Madonna, director of the Franklin \u0026 Marshall College Poll and professor of pubic affairs at Franklin \u0026 Marshall College, said Frank Sinatra's song \"My Way\" could apply to Specter.", + "length": 185 + }, + { + "text": "\"The insight he brings from his career in public service, particularly as a leader on judicial issues, will be invaluable to our students as they prepare for their own careers in the law.", + "length": 187 + }, + { + "text": "Ed Rendell, a Democrat, called Specter \"a mentor, colleague and a political institution\" who \"did more for the people of Pennsylvania over his more than 30-year career with the possible exception of Benjamin Franklin.", + "length": 217 + }, + { + "text": "Arlen Specter, who embodied a vanishing breed of liberal Republicanism before switching to the Democratic Party at the twilight of his political career, died Sunday after a long battle with cancer, his family announced.", + "length": 219 + }, + { + "text": "\"During his tenure in the Senate, Specter championed Pennsylvania's economy and took an active interest in foreign affairs, meeting with dozens of world leaders as well as supporting appropriations to fight the global HIV/AIDS pandemic and backing free trade agreements between the U.", + "length": 284 + }, + { + "text": "\" \"From his days stamping out corruption as a prosecutor in Philadelphia to his three decades of service in the Senate, Arlen was fiercely independent -- never putting party or ideology ahead of the people he was chosen to serve,\" Obama said in a written statement on Specter's death.", + "length": 284 + }, + { + "text": "\"From his role in sinking Robert Bork's Supreme Court nomination to his cross-examination of Anita Hill, from stem-cell research to the impeachment of Bill Clinton, Specter's greatest talent may be his unique ability to put himself -- somehow, some way -- in the center of the nation's most important debates,\" the article said.", + "length": 328 + }, + { + "text": "-- \"Passion for Truth: From Finding JFK's Single Bullet to Questioning Anita Hill to Impeaching Clinton\" -- \"Never Give In: Battling Cancer in the Senate\" -- \"Life Among the Cannibals: A Political Career, a Tea Party Uprising, and the End of Governing As We Know It\" \"For the past quarter-century, he's also been a Zelig-like national figure,\" the Philadelphia magazine article said, referring to the Woody Allen character from the film of the same name who changed his persona as his surroundings and circumstances changed.", + "length": 524 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6649226546287537 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:49.199695877Z", + "first_section_created": "2025-12-23T09:35:49.20003839Z", + "last_section_published": "2025-12-23T09:35:49.200632313Z", + "all_results_received": "2025-12-23T09:35:49.301045192Z", + "output_generated": "2025-12-23T09:35:49.301359304Z", + "total_processing_time_ms": 101, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 100, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:49.20003839Z", + "publish_time": "2025-12-23T09:35:49.200319101Z", + "first_worker_start": "2025-12-23T09:35:49.20082022Z", + "last_worker_end": "2025-12-23T09:35:49.300162Z", + "total_journey_time_ms": 100, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:49.200732617Z", + "start_time": "2025-12-23T09:35:49.20082022Z", + "end_time": "2025-12-23T09:35:49.200932724Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:49.201194Z", + "start_time": "2025-12-23T09:35:49.201344Z", + "end_time": "2025-12-23T09:35:49.300162Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 98 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:49.200857521Z", + "start_time": "2025-12-23T09:35:49.200945625Z", + "end_time": "2025-12-23T09:35:49.20107463Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:49.200852621Z", + "start_time": "2025-12-23T09:35:49.200947225Z", + "end_time": "2025-12-23T09:35:49.200999327Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:49.200383203Z", + "publish_time": "2025-12-23T09:35:49.200632313Z", + "first_worker_start": "2025-12-23T09:35:49.201015828Z", + "last_worker_end": "2025-12-23T09:35:49.286401Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:49.200970126Z", + "start_time": "2025-12-23T09:35:49.201015828Z", + "end_time": "2025-12-23T09:35:49.201126832Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:49.201416Z", + "start_time": "2025-12-23T09:35:49.201591Z", + "end_time": "2025-12-23T09:35:49.286401Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:49.20108843Z", + "start_time": "2025-12-23T09:35:49.201142532Z", + "end_time": "2025-12-23T09:35:49.201293138Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:49.201115731Z", + "start_time": "2025-12-23T09:35:49.201175734Z", + "end_time": "2025-12-23T09:35:49.201211735Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 182, + "min_processing_ms": 84, + "max_processing_ms": 98, + "avg_processing_ms": 91, + "median_processing_ms": 98, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4703, + "slowest_section_id": 0, + "slowest_section_time_ms": 100 + } +} diff --git a/data/output/00675b074ea1ab054dc2158d7c228c74b42eb1f3.json b/data/output/00675b074ea1ab054dc2158d7c228c74b42eb1f3.json new file mode 100644 index 0000000..5122159 --- /dev/null +++ b/data/output/00675b074ea1ab054dc2158d7c228c74b42eb1f3.json @@ -0,0 +1,302 @@ +{ + "file_name": "00675b074ea1ab054dc2158d7c228c74b42eb1f3.txt", + "total_words": 676, + "top_n_words": [ + { + "word": "i", + "count": 25 + }, + { + "word": "he", + "count": 20 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "the", + "count": 18 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "was", + "count": 18 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "his", + "count": 14 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "sean", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Katy Winter .", + "length": 13 + }, + { + "text": "I lived on junk food.", + "length": 21 + }, + { + "text": "'I couldn’t believe it.", + "length": 25 + }, + { + "text": "'I’m really proud of him.", + "length": 27 + }, + { + "text": "'It was fast food all the time.", + "length": 31 + }, + { + "text": "My wife ate normally but I would just eat rubbish.", + "length": 50 + }, + { + "text": "He said: 'I would lose weight and then put more on.", + "length": 51 + }, + { + "text": "'I would have a chocolate bar even before breakfast.", + "length": 52 + }, + { + "text": "Gregg’s pies, curries and fish suppers - that was my diet.", + "length": 60 + }, + { + "text": "I walked out of the hospital and realised it was time to change.", + "length": 64 + }, + { + "text": "'I was frightened for him, but it’s completely changed his life.", + "length": 66 + }, + { + "text": "'I wanted to make myself better and add some more years to my life.", + "length": 67 + }, + { + "text": "'We had even considered a gastric band until he started to lose the weight.", + "length": 75 + }, + { + "text": "The doctor had warned him he wouldn’t see 50 if he didn’t make a change.", + "length": 76 + }, + { + "text": "Before I started my diet I couldn’t walk more than 20 yards but now I have so much energy.", + "length": 92 + }, + { + "text": "But he admitted standing alongside fit soldiers was 'a bit embarrassing' when he was at his heaviest.", + "length": 101 + }, + { + "text": "He also revealed it was three weeks before he could get onto a regular set of scales, because he was too big.", + "length": 109 + }, + { + "text": "As well as his work as a HGV instructor, Sean now runs Fit Factor, at Signatures Studio, in North Lanarkshire.", + "length": 110 + }, + { + "text": "Sean's truckstop diet of burgers, fish suppers and chocolate bar snacks had led to his weigh increasing to 24st .", + "length": 113 + }, + { + "text": "Now the HGV driving instructor, from Bellshill, North Lanarkshire, weighs a trim 13st 8lb and runs 7km every day.", + "length": 113 + }, + { + "text": "Medics told Sean he might have sleep apnoea - a constriction of the airwaves caused by the fat around his throat .", + "length": 114 + }, + { + "text": "Sean, 48, tipped the scales at 24st after an unhealthy truckstop diet of burgers, fish suppers and chocolate bars.", + "length": 114 + }, + { + "text": "He said: 'I snore when I sleep and my wife noticed that I would stop breathing when I slept so we went to get it checked out.", + "length": 125 + }, + { + "text": "Sean dropped half his body weight by using meal replacement drinks for breakfast and lunch and eating a healthy evening meal .", + "length": 126 + }, + { + "text": "Being told that his size was not only effecting his health but jeopardizing his career was the prompt Sean needed to lose weight .", + "length": 130 + }, + { + "text": "Heavyweight trucker Sean Paterson shed more than half his body weight - after doctors told him he was becoming too unhealthy to drive.", + "length": 134 + }, + { + "text": "'I faced losing my livelihood, my health, because of my weight around my neck it was making it difficult for me to breathe when I lay flat.", + "length": 139 + }, + { + "text": "'I went to the consultant at Stobhill Hospital and he warned me that they would have to contact the DVLA if it was confirmed I had sleep apnoea.", + "length": 144 + }, + { + "text": "His wife Ann, 49, a senior support worker, said: 'It was quite scary - I would wake up and there would be 30 to 40 seconds when he wasn’t breathing.", + "length": 150 + }, + { + "text": "Sean said: 'After I started the diet, I was too heavy in the first three weeks, so I had to go in and check my weight on one of those big scales in Boots.", + "length": 154 + }, + { + "text": "But he lost over 10st after he was warned he could have sleep apnoea - a medical condition brought on by his weight - and could face losing his driving licence.", + "length": 160 + }, + { + "text": "'The other week we were out for dinner and an old friend didn’t recognise him because of how much weight he had lost - she thought I was with a new boyfriend.", + "length": 160 + }, + { + "text": "Sean began to lose weight using Herbalife last year, a nutritional supplement, which sees him drink two shakes for breakfast and lunch, with a main meal in the evening of chicken and veg.", + "length": 187 + }, + { + "text": "' Sean’s driving career almost hit the skids when medics told him he might have sleep apnoea - a constriction of the airwaves caused by the fat around his throat, which would have made him illegible for an HGV licence.", + "length": 220 + }, + { + "text": "' Sean’s job as an HGV driving instructor meant he trained emergency service workers and was one of six instructors sent to Arbroath to train Royal Commandos in off-road driving before they were deployed to Afghanistan.", + "length": 221 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7813835740089417 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:49.701451759Z", + "first_section_created": "2025-12-23T09:35:49.701788772Z", + "last_section_published": "2025-12-23T09:35:49.70199468Z", + "all_results_received": "2025-12-23T09:35:49.770536928Z", + "output_generated": "2025-12-23T09:35:49.770698134Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:49.701788772Z", + "publish_time": "2025-12-23T09:35:49.70199468Z", + "first_worker_start": "2025-12-23T09:35:49.702427897Z", + "last_worker_end": "2025-12-23T09:35:49.769625Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:49.702419997Z", + "start_time": "2025-12-23T09:35:49.702485799Z", + "end_time": "2025-12-23T09:35:49.702557002Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:49.702619Z", + "start_time": "2025-12-23T09:35:49.702761Z", + "end_time": "2025-12-23T09:35:49.769625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:49.702380395Z", + "start_time": "2025-12-23T09:35:49.702427897Z", + "end_time": "2025-12-23T09:35:49.7025207Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:49.702380695Z", + "start_time": "2025-12-23T09:35:49.702449798Z", + "end_time": "2025-12-23T09:35:49.702481999Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3632, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/00678bb443ec2449de6787830d959fe2a423a6c8.json b/data/output/00678bb443ec2449de6787830d959fe2a423a6c8.json new file mode 100644 index 0000000..bc86d82 --- /dev/null +++ b/data/output/00678bb443ec2449de6787830d959fe2a423a6c8.json @@ -0,0 +1,234 @@ +{ + "file_name": "00678bb443ec2449de6787830d959fe2a423a6c8.txt", + "total_words": 341, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "albanese", + "count": 7 + }, + { + "word": "be", + "count": 7 + }, + { + "word": "for", + "count": 7 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "mr", + "count": 6 + }, + { + "word": "laws", + "count": 5 + }, + { + "word": "on", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "'Sometimes things might have gone too far.", + "length": 42 + }, + { + "text": "If that is the case they should be wound back.", + "length": 46 + }, + { + "text": "'I'm concerned about the rights of journalists.", + "length": 47 + }, + { + "text": "' Mr Albanese's concerns may be too little too late.", + "length": 52 + }, + { + "text": "Tony Abbott and Parliament beefed up Australia's terror laws this month .", + "length": 73 + }, + { + "text": "'We should be arguing for more scrutiny of these issues,' Mr Albanese said.", + "length": 75 + }, + { + "text": "Journalists will face 10 years in jail if they reveal details of intelligence operations.", + "length": 89 + }, + { + "text": "Tony Abbott's Parliament beefed up the domestic spy agency ASIO's powers earlier this month.", + "length": 92 + }, + { + "text": "Mr Albanese said he was speaking for himself and not Oppostion Leader Bill Shorten in his comments .", + "length": 101 + }, + { + "text": "Lobor backbencher Melissa Parke was the lone voice of dissent in Parliament when the new laws went through .", + "length": 108 + }, + { + "text": "Security agencies are trying to impose things that have been in their bottom drawer for a long time, he said.", + "length": 109 + }, + { + "text": "Mr Albanese has now hit out at the lack of scrutiny over the measures, describing the penalties on the media as draconian.", + "length": 122 + }, + { + "text": "Asked if Opposition Leader Bill Shorten shared his concerns about press freedom, Mr Albanese said: 'I'm speaking for myself.", + "length": 124 + }, + { + "text": "The party's spokesperson on Infrastructure, Transport and Tourism, said there was public interest in exposing errors of security agencies.", + "length": 139 + }, + { + "text": "'You can be fully supportive of our engagement in the Middle East and still say we don't protect freedom by giving it up,' Mr Albanese told Sky News.", + "length": 149 + }, + { + "text": "Labor waved the bill through parliament, citing bipartisan support for national security, and backbencher Melissa Parke was the lone voice of dissent.", + "length": 150 + }, + { + "text": "' The second tranche of anti-terror laws cracking down on Australians who go overseas to fight alongside extremists will be debated in parliament in mid-October.", + "length": 161 + }, + { + "text": "A senior Labor frontbencher has broken ranks and says new anti-terror laws that allow journalists to be jailed for reporting on intelligence operations might have overstepped the mark.", + "length": 185 + }, + { + "text": "Labor frontbencher Anthony Albanese has spoken out saying new anti-terror laws that allow journalists to be jailed for reporting on intelligence operations might have overstepped the mark .", + "length": 190 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8586990833282471 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:50.203130639Z", + "first_section_created": "2025-12-23T09:35:50.203402449Z", + "last_section_published": "2025-12-23T09:35:50.203540055Z", + "all_results_received": "2025-12-23T09:35:50.265442646Z", + "output_generated": "2025-12-23T09:35:50.26555535Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:50.203402449Z", + "publish_time": "2025-12-23T09:35:50.203540055Z", + "first_worker_start": "2025-12-23T09:35:50.204106977Z", + "last_worker_end": "2025-12-23T09:35:50.264621Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:50.204054575Z", + "start_time": "2025-12-23T09:35:50.204106977Z", + "end_time": "2025-12-23T09:35:50.204148878Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:50.204237Z", + "start_time": "2025-12-23T09:35:50.204393Z", + "end_time": "2025-12-23T09:35:50.264621Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:50.204030974Z", + "start_time": "2025-12-23T09:35:50.204107577Z", + "end_time": "2025-12-23T09:35:50.204170579Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:50.204278583Z", + "start_time": "2025-12-23T09:35:50.204539193Z", + "end_time": "2025-12-23T09:35:50.204560194Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2070, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/00679e502f744db997e4a0047aa3e2c91ae2d874.json b/data/output/00679e502f744db997e4a0047aa3e2c91ae2d874.json new file mode 100644 index 0000000..94f29f9 --- /dev/null +++ b/data/output/00679e502f744db997e4a0047aa3e2c91ae2d874.json @@ -0,0 +1,460 @@ +{ + "file_name": "00679e502f744db997e4a0047aa3e2c91ae2d874.txt", + "total_words": 856, + "top_n_words": [ + { + "word": "the", + "count": 66 + }, + { + "word": "of", + "count": 31 + }, + { + "word": "to", + "count": 27 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "and", + "count": 23 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "diary", + "count": 15 + }, + { + "word": "rosenberg", + "count": 15 + }, + { + "word": "nazi", + "count": 14 + }, + { + "word": "s", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "W.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "'A .", + "length": 4 + }, + { + "text": "'The .", + "length": 6 + }, + { + "text": "'The .", + "length": 6 + }, + { + "text": "' How .", + "length": 7 + }, + { + "text": "Further .", + "length": 9 + }, + { + "text": "Washington.", + "length": 11 + }, + { + "text": "preliminary.", + "length": 12 + }, + { + "text": "remained missing.", + "length": 17 + }, + { + "text": "to be true is unclear.", + "length": 22 + }, + { + "text": "Part of history: Robert M.", + "length": 26 + }, + { + "text": "Richardson declined to comment.", + "length": 31 + }, + { + "text": "No charges were filed in the case.", + "length": 34 + }, + { + "text": "It also includes details about the .", + "length": 36 + }, + { + "text": "of Jews and other Eastern Europeans.", + "length": 36 + }, + { + "text": "But the bulk of the diary never surfaced.", + "length": 41 + }, + { + "text": "Kempner, a Nuremberg prosecutor, was long suspected by U.", + "length": 57 + }, + { + "text": "officials of smuggling the diary back to the United States.", + "length": 59 + }, + { + "text": "number of important issues relating to the Third Reich's policy.", + "length": 64 + }, + { + "text": "A Nuremberg prosecutor, Robert Kempner, was long suspected by U.", + "length": 64 + }, + { + "text": "era, including the history of the Holocaust,' according to the .", + "length": 64 + }, + { + "text": "details about the diary's contents could not be learned, and a U.", + "length": 65 + }, + { + "text": "government official stressed that the museum's analysis remains .", + "length": 65 + }, + { + "text": "complements, and in part contradicts, already known documentation.", + "length": 66 + }, + { + "text": "They tracked the diary to Richardson, who was living near Buffalo.", + "length": 66 + }, + { + "text": "diary will be an important source of information to historians that .", + "length": 69 + }, + { + "text": "the writings of Rosenberg, a Nazi Reich minister who was convicted at .", + "length": 71 + }, + { + "text": "documentation is of considerable importance for the study of the Nazi .", + "length": 71 + }, + { + "text": "assessment, prepared by the United States Holocaust Memorial Museum in .", + "length": 72 + }, + { + "text": "Nuremberg and hanged in 1946, might contradict what historians believe .", + "length": 72 + }, + { + "text": "German occupation of the Soviet Union, including plans for mass killings .", + "length": 74 + }, + { + "text": "cursory content analysis indicates that the material sheds new light on a .", + "length": 75 + }, + { + "text": "officials of smuggling Rosenberg's diary out of Germany after the Nazi trials .", + "length": 79 + }, + { + "text": "Immigration and Customs Enforcement, Department of Justice and Holocaust museum.", + "length": 80 + }, + { + "text": "A government official said more details will be announced at the news conference.", + "length": 81 + }, + { + "text": "Rosenberg was an early and powerful Nazi ideologue, particularly on racial issues.", + "length": 82 + }, + { + "text": "His diary, once held by Nuremberg prosecutors as evidence, vanished after the trial.", + "length": 84 + }, + { + "text": "He directed the Nazi party's foreign affairs department and edited the Nazi newspaper.", + "length": 86 + }, + { + "text": "Several of his memos to Hitler were cited as evidence during the post-war Nuremberg trials.", + "length": 91 + }, + { + "text": "The Nazi unit created to seize such artifacts was called Task Force Reichsleiter Rosenberg.", + "length": 91 + }, + { + "text": "After the 1999 incident, the FBI opened a criminal investigation into the missing documents.", + "length": 92 + }, + { + "text": "Harrowing tales: A diary has been recovered which belonged to Hilter confidant Alfred Rosenberg .", + "length": 97 + }, + { + "text": "Reign of terror: Hitler and his retinue, including Rosenberg, on the Koniglichen Platz in Munich .", + "length": 98 + }, + { + "text": "A long-lost diary belonging to a confidant of Adolf Hitler has been recovered, it was revealed today.", + "length": 102 + }, + { + "text": "Evil workings: Reichsleader Alfred Rosenberg (left) and Reichsminister Dr Henrich Lammers with Adolf Hitler .", + "length": 109 + }, + { + "text": "Born in Germany, Kempner had fled to America in the 1930s to escape the Nazis, only to return for post-war trials.", + "length": 114 + }, + { + "text": "He was convicted of crimes against humanity and was one of a dozen senior Nazi officials executed in October 1946.", + "length": 114 + }, + { + "text": "Rosenberg also directed the systematic Nazi looting of Jewish art, cultural and religious property throughout Europe.", + "length": 117 + }, + { + "text": "The recovery is expected to be announced this week at a news conference in Delaware held jointly by officials from the U.", + "length": 121 + }, + { + "text": "Kempner cited a few Rosenberg diary excerpts in his memoir, and in 1956 a German historian published entries from 1939 and 1940.", + "length": 128 + }, + { + "text": "Early this year, the Holocaust museum and an agent from Homeland Security Investigation tried to locate the missing diary pages.", + "length": 128 + }, + { + "text": "The diary offers a loose collection of Rosenberg's recollections from spring 1936 to winter 1944, according to the museum's analysis.", + "length": 133 + }, + { + "text": "The 400 pages belonged to Alfred Rosenberg, a high-ranking Nazi who played a central role in the extermination of millions of Jews and others during World War Two.", + "length": 163 + }, + { + "text": "Most entries are written in Rosenberg's looping cursive, some on paper torn from a ledger book and others on the back of official Nazi stationery, the analysis said.", + "length": 165 + }, + { + "text": "A preliminary government assessment says that the diary could offer new insight into meetings Rosenberg had with Hitler and other top Nazi leaders, including Heinrich Himmler and Herman Goering.", + "length": 194 + }, + { + "text": "When Kempner died in 1993 at age 93, legal disputes about his papers raged for nearly a decade between his children, his former secretary, a local debris removal contractor and the Holocaust museum.", + "length": 198 + }, + { + "text": "The children agreed to give their father's papers to the Holocaust museum, but when officials arrived to retrieve them from his home in 1999, they discovered that many thousands of pages were missing.", + "length": 200 + }, + { + "text": "He is credited with helping reveal the existence of the Wannsee Protocol, the 1942 conference during which Nazi officials met to coordinate the genocide against the Jews, which they termed 'The Final Solution'.", + "length": 210 + }, + { + "text": "But the Holocaust museum has gone on to recover more than 150,000 documents, including a trove held by Kempner's former secretary, who by then had moved into the New York state home of an academic named Herbert Richardson.", + "length": 222 + }, + { + "text": "Defeated: War criminals of the Nazi regime (left to right) Hermann Göring, Alfred Rosenberg, Baldur von Schirach and Karl Dönitz sit at a wooden table with metal plates and pieces of bread during the Nuremberg Trials in 1946 .", + "length": 228 + }, + { + "text": "But the diary does include details about tensions within the German high-command - in particular, the crisis caused by the flight of Rudolf Hess to Britain in 1941, and the looting of art throughout Europe, according to the preliminary analysis.", + "length": 245 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5011798739433289 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:50.7043215Z", + "first_section_created": "2025-12-23T09:35:50.704670813Z", + "last_section_published": "2025-12-23T09:35:50.705004226Z", + "all_results_received": "2025-12-23T09:35:50.791244957Z", + "output_generated": "2025-12-23T09:35:50.791471866Z", + "total_processing_time_ms": 87, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:50.704670813Z", + "publish_time": "2025-12-23T09:35:50.704893622Z", + "first_worker_start": "2025-12-23T09:35:50.705291437Z", + "last_worker_end": "2025-12-23T09:35:50.790335Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:50.705384741Z", + "start_time": "2025-12-23T09:35:50.705460644Z", + "end_time": "2025-12-23T09:35:50.705580948Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:50.705854Z", + "start_time": "2025-12-23T09:35:50.705977Z", + "end_time": "2025-12-23T09:35:50.790335Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:50.705328239Z", + "start_time": "2025-12-23T09:35:50.705395741Z", + "end_time": "2025-12-23T09:35:50.705512946Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:50.705228935Z", + "start_time": "2025-12-23T09:35:50.705291437Z", + "end_time": "2025-12-23T09:35:50.705438143Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:50.704946924Z", + "publish_time": "2025-12-23T09:35:50.705004226Z", + "first_worker_start": "2025-12-23T09:35:50.705332839Z", + "last_worker_end": "2025-12-23T09:35:50.767678Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:50.705774356Z", + "start_time": "2025-12-23T09:35:50.706021365Z", + "end_time": "2025-12-23T09:35:50.706036266Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:50.705711Z", + "start_time": "2025-12-23T09:35:50.705883Z", + "end_time": "2025-12-23T09:35:50.767678Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:50.705410042Z", + "start_time": "2025-12-23T09:35:50.705440043Z", + "end_time": "2025-12-23T09:35:50.705479544Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:50.705291637Z", + "start_time": "2025-12-23T09:35:50.705332839Z", + "end_time": "2025-12-23T09:35:50.705338339Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 145, + "min_processing_ms": 61, + "max_processing_ms": 84, + "avg_processing_ms": 72, + "median_processing_ms": 84, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2661, + "slowest_section_id": 0, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/0067bce5de99c3b35129f50f932d5b71a29611aa.json b/data/output/0067bce5de99c3b35129f50f932d5b71a29611aa.json new file mode 100644 index 0000000..3a09a70 --- /dev/null +++ b/data/output/0067bce5de99c3b35129f50f932d5b71a29611aa.json @@ -0,0 +1,210 @@ +{ + "file_name": "0067bce5de99c3b35129f50f932d5b71a29611aa.txt", + "total_words": 302, + "top_n_words": [ + { + "word": "the", + "count": 15 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "an", + "count": 6 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "his", + "count": 6 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "them", + "count": 6 + }, + { + "word": "were", + "count": 6 + }, + { + "word": "after", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "Some are living in shelters, while one has left the country.", + "length": 60 + }, + { + "text": "'He threatened to harm them if they didn't tell him everything they were doing.", + "length": 79 + }, + { + "text": "Shoja S, who pretended to be a CEO for an oil company, told a court in Marbella that the girls were his employees.", + "length": 114 + }, + { + "text": "The woman - originally from Germany, Lithuania, Kazakhstan, Denmark, Mongolia, Russia and Turkmenistan - have left the house.", + "length": 125 + }, + { + "text": "Eight others have pressed charges against him for physical and psychological abuse that allegedly left them in a state of 'terror'.", + "length": 131 + }, + { + "text": "An Iranian conman allegedly duped ten wannabe models into being his sex slaves after pretending to be an oil tycoon with an enviable list of contacts in the fashion industry.", + "length": 174 + }, + { + "text": "' One of the women has now filed a lawsuit against Shoja S for allegedly forcing her family to pay the €6,500 (£5,300) rent for his mansion in Marbella where they were housed.", + "length": 178 + }, + { + "text": "The 50-year-old, known as Shoja S, fathered seven children with the women after convincing them all to live at his palace in the Spanish resort of Marbella (above) for three years .", + "length": 181 + }, + { + "text": "The European and Asian girls, aged between 19 and 24, soon realised they were being conned, but were unable to leave after allegedly being subjected to emotional and physical abuse.", + "length": 181 + }, + { + "text": "The 50-year-old, known as Shoja S, reportedly fathered seven children with the women after convincing them all to live at his Spanish palace for three years with the promise of making them catwalk stars.", + "length": 203 + }, + { + "text": "An Iranian conman duped ten wannabe models into being his sex slaves after pretending to be an oil tycoon who could make them catwalk stars (file picture posed by model) 'They were always controlled and supervised by him,' it was reported by Spanish newspaper ABC, according to The Local.", + "length": 288 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.649162769317627 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:51.205351654Z", + "first_section_created": "2025-12-23T09:35:51.205731069Z", + "last_section_published": "2025-12-23T09:35:51.205895375Z", + "all_results_received": "2025-12-23T09:35:51.270276762Z", + "output_generated": "2025-12-23T09:35:51.270439669Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:51.205731069Z", + "publish_time": "2025-12-23T09:35:51.205895375Z", + "first_worker_start": "2025-12-23T09:35:51.206422896Z", + "last_worker_end": "2025-12-23T09:35:51.269372Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:51.206387794Z", + "start_time": "2025-12-23T09:35:51.206447097Z", + "end_time": "2025-12-23T09:35:51.206482798Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:51.206639Z", + "start_time": "2025-12-23T09:35:51.206785Z", + "end_time": "2025-12-23T09:35:51.269372Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:51.206382594Z", + "start_time": "2025-12-23T09:35:51.206449297Z", + "end_time": "2025-12-23T09:35:51.206493198Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:51.206361693Z", + "start_time": "2025-12-23T09:35:51.206422896Z", + "end_time": "2025-12-23T09:35:51.206442096Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1750, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0067c471ca8e3b37ba5634fb480c1a53117c49b5.json b/data/output/0067c471ca8e3b37ba5634fb480c1a53117c49b5.json new file mode 100644 index 0000000..af9d28e --- /dev/null +++ b/data/output/0067c471ca8e3b37ba5634fb480c1a53117c49b5.json @@ -0,0 +1,404 @@ +{ + "file_name": "0067c471ca8e3b37ba5634fb480c1a53117c49b5.txt", + "total_words": 1154, + "top_n_words": [ + { + "word": "the", + "count": 53 + }, + { + "word": "and", + "count": 36 + }, + { + "word": "to", + "count": 35 + }, + { + "word": "he", + "count": 34 + }, + { + "word": "i", + "count": 31 + }, + { + "word": "a", + "count": 27 + }, + { + "word": "him", + "count": 21 + }, + { + "word": "in", + "count": 21 + }, + { + "word": "was", + "count": 18 + }, + { + "word": "for", + "count": 17 + } + ], + "sorted_sentences": [ + { + "text": "Dawson.", + "length": 7 + }, + { + "text": "'I had more or less looked after him.", + "length": 37 + }, + { + "text": "I feel like I've been taken for granted.", + "length": 40 + }, + { + "text": "We want to punish you and protect the public.", + "length": 45 + }, + { + "text": "After he was barred Mr Dawson took pity on him.", + "length": 47 + }, + { + "text": "'Ken is a very proud man and this has upset him.", + "length": 48 + }, + { + "text": "I thought he was a friend and as a friend I always paid.", + "length": 56 + }, + { + "text": "These days people do not have a kind word to say about him.", + "length": 59 + }, + { + "text": "'I don't think he is bothered or sorry for what he has done.", + "length": 60 + }, + { + "text": "'I am just pleased that he got sent down - it serves him right.", + "length": 63 + }, + { + "text": "In court he didn't even have the decency to look at me in the eye.", + "length": 66 + }, + { + "text": "The pair would go shopping together and on day trips to Blackpool.", + "length": 66 + }, + { + "text": "Should the defendant get gainful employment, he will repay him in full.", + "length": 71 + }, + { + "text": "'Everyone makes mistakes and I even tried to get him back into the club.", + "length": 72 + }, + { + "text": "Mr Dawson would lend Youngs money and even lent him £3,000 to buy a car.", + "length": 73 + }, + { + "text": "Paul used to be a good lad at one time and used to be the Bingo caller here.", + "length": 76 + }, + { + "text": "'It is a lot of money and my life savings and pensions I had worked hard for.", + "length": 77 + }, + { + "text": "' He admitted theft between March 29 and August 22 but claimed he only stole £1,200.", + "length": 85 + }, + { + "text": "'We all feel very disgusted at what Paul did to Ken especially as Ken trusted him so much.", + "length": 90 + }, + { + "text": "' After the hearing Clifford Heaton, secretary for the Rosegrove Unity Working Men's Club, said.", + "length": 96 + }, + { + "text": "Today as Youngs was jailed for 12 weeks, Mr Dawson said he had 'paid the price' for his good deed.", + "length": 98 + }, + { + "text": "' World War II hero Ken Dawson salutes the grave of a fallen comrade near his home in Burnley, Lancs .", + "length": 102 + }, + { + "text": "Victim Ken Dawson, pictured with his regiment in 1942, was swindled out of his savings by callous Paul Youngs .", + "length": 111 + }, + { + "text": "As the oldest veteran at his local Royal British Legion he would lead the Remembrance parade in Burnley each year.", + "length": 114 + }, + { + "text": "' But sentencing JP Stephen Riley told Youngs: 'What you did involved a breach of trust to an extremely high degree.", + "length": 116 + }, + { + "text": "'He is extremely sorry and extremely ashamed of himself, more particularly for the impact this has had upon Mr Dawson.", + "length": 118 + }, + { + "text": "'We used to go shopping every Saturday afternoon and I would give him my card to get out cash for me to cover the week.", + "length": 119 + }, + { + "text": "'Needless to say Paul is barred for life - but I don't think even he would come back anyway because he would be too ashamed.", + "length": 124 + }, + { + "text": "War hero: Former rifleman Ken Dawson pictured in his army days was targeted by the bingo caller he treated as an old comrade.", + "length": 125 + }, + { + "text": "'I had put all my trust in him fought his corner and I thought he was my friend - yet he betrayed me in a most dastardly way.", + "length": 125 + }, + { + "text": "I used to take him fishing every Tuesday and after we had been shopping I would take him for a meal once or twice in the week.", + "length": 126 + }, + { + "text": "Former rifleman Ken Dawson pictured with fellow veterans at the Rosegrove Unity Working Men's Club where he met jobless Paul Youngs .", + "length": 133 + }, + { + "text": "By the time the thefts were discovered, when Mr Dawson checked his bank statements, he had lost up to £1,800 from his savings account.", + "length": 135 + }, + { + "text": "'I had trouble getting out of the car so I'd give him my card and number to get £150 but he was getting double, £150 for me and £150 for him.", + "length": 144 + }, + { + "text": "Youngs, who at the time worked at a care home and lived with his late mother, joined the committee and became a caller at the club's bingo nights.", + "length": 146 + }, + { + "text": "Mr Dawson, a retired welder, added: 'Paul had been barred from the club but I wanted to stick up for him because it seems like the right the thing to do.", + "length": 153 + }, + { + "text": "Callous Youngs repaid the old man by using his bank card to withdraw £150 every week from his account over a four month period, which he spent on himself.", + "length": 155 + }, + { + "text": "On occasions Youngs would pay him back in full but on others, if he claimed to be struggling financially, the victim told him to leave it or forget about it.", + "length": 157 + }, + { + "text": "Decorated war hero Ken Dawson, 89,  who is pictured wearing his treasured medals, was tricked out of his life savings by  jobless bingo caller Paul Youngs .", + "length": 158 + }, + { + "text": "A kind hearted war hero was tricked out of his life savings in a 'dastardly betrayal' by a jobless bingo caller he treated like an old comrade, a court heard.", + "length": 158 + }, + { + "text": "' The old man was 17 when he joined the First Battalion Cameronians Scottish Rifles and had served in India, Burma and Singapore winning five campaign medals.", + "length": 158 + }, + { + "text": "'I lay awake at night now and have trouble getting to sleep because I keep going through it all in my head and wonder how it happened, and why he did that to me.", + "length": 161 + }, + { + "text": "Burnley Magistrates Court was told the pair had met nine years ago at Rosegrove Unity Working Men's Club where Mr Dawson had been a committee member for 30 years.", + "length": 162 + }, + { + "text": "We have spoken about putting a charity show on for Ken in the New Year and any proceeds to go to him and if he wants he can put the proceeds to the British Legion.", + "length": 163 + }, + { + "text": "Trusting veteran Ken Dawson, 89, befriended Paul Youngs even though the 53-year-old was barred from their local social club where they both served on the committee.", + "length": 165 + }, + { + "text": "When he was quizzed by police, Youngs, of Stansfield Street, Burnley,said he had been out of work, was struggling with bills and the money was to keep him on the ' straight and narrow.", + "length": 184 + }, + { + "text": "'Unfortunately when I got my bank statements I would put them in the drawer and it was only when I came to look at them that I realised what had happened and I felt sick to the stomach.", + "length": 185 + }, + { + "text": "'I just thought everyone deserves a second chance and unfortunately I've found out the hard way that a leopard never changes its spots,' said the great grandfather from Burnley, Lancashire.", + "length": 189 + }, + { + "text": "'The army makes you realise and see things, you learn trust and respect - but unfortunately I put trust in this man like I would have done one of my old comrades and sadly I've paid the price.", + "length": 192 + }, + { + "text": "The decorated former Rifleman who fought the Japanese in Burma wanted to give Youngs a second chance and helped him pay for a car, buy meals, and only asked in return for his assistance with the weekly shop.", + "length": 207 + }, + { + "text": "'When we first started going to the hole in the wall I could see what he was doing because he parked right against it - but then all of a sudden after a few weeks he started going round the other side of the building and he was taking longer to withdraw the money.", + "length": 264 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7633633315563202 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:51.70667592Z", + "first_section_created": "2025-12-23T09:35:51.706974532Z", + "last_section_published": "2025-12-23T09:35:51.707389848Z", + "all_results_received": "2025-12-23T09:35:51.813303739Z", + "output_generated": "2025-12-23T09:35:51.813529648Z", + "total_processing_time_ms": 106, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 105, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:51.706974532Z", + "publish_time": "2025-12-23T09:35:51.707297944Z", + "first_worker_start": "2025-12-23T09:35:51.707785663Z", + "last_worker_end": "2025-12-23T09:35:51.812505Z", + "total_journey_time_ms": 105, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:51.707911168Z", + "start_time": "2025-12-23T09:35:51.708148777Z", + "end_time": "2025-12-23T09:35:51.708278282Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:51.708106Z", + "start_time": "2025-12-23T09:35:51.708282Z", + "end_time": "2025-12-23T09:35:51.812505Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 104 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:51.70770036Z", + "start_time": "2025-12-23T09:35:51.707785663Z", + "end_time": "2025-12-23T09:35:51.707893067Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:51.707814964Z", + "start_time": "2025-12-23T09:35:51.707867466Z", + "end_time": "2025-12-23T09:35:51.707909068Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:51.707334246Z", + "publish_time": "2025-12-23T09:35:51.707389848Z", + "first_worker_start": "2025-12-23T09:35:51.707869366Z", + "last_worker_end": "2025-12-23T09:35:51.807526Z", + "total_journey_time_ms": 100, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:51.707897467Z", + "start_time": "2025-12-23T09:35:51.707941269Z", + "end_time": "2025-12-23T09:35:51.70796447Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:51.708133Z", + "start_time": "2025-12-23T09:35:51.708279Z", + "end_time": "2025-12-23T09:35:51.807526Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 99 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:51.707937169Z", + "start_time": "2025-12-23T09:35:51.70796547Z", + "end_time": "2025-12-23T09:35:51.707994971Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:51.707819764Z", + "start_time": "2025-12-23T09:35:51.707869366Z", + "end_time": "2025-12-23T09:35:51.707880767Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 203, + "min_processing_ms": 99, + "max_processing_ms": 104, + "avg_processing_ms": 101, + "median_processing_ms": 104, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3010, + "slowest_section_id": 0, + "slowest_section_time_ms": 105 + } +} diff --git a/data/output/0067e49198547ff999fe3ea5a2b202b374564424.json b/data/output/0067e49198547ff999fe3ea5a2b202b374564424.json new file mode 100644 index 0000000..cc8b464 --- /dev/null +++ b/data/output/0067e49198547ff999fe3ea5a2b202b374564424.json @@ -0,0 +1,488 @@ +{ + "file_name": "0067e49198547ff999fe3ea5a2b202b374564424.txt", + "total_words": 1179, + "top_n_words": [ + { + "word": "the", + "count": 53 + }, + { + "word": "and", + "count": 46 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "her", + "count": 26 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "he", + "count": 20 + }, + { + "word": "taylor", + "count": 19 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "s", + "count": 17 + } + ], + "sorted_sentences": [ + { + "text": "and Mrs.", + "length": 8 + }, + { + "text": "Visit site .", + "length": 12 + }, + { + "text": "Visit site .", + "length": 12 + }, + { + "text": "Visit site .", + "length": 12 + }, + { + "text": "Visit site .", + "length": 12 + }, + { + "text": "Visit site .", + "length": 12 + }, + { + "text": "Sometimes old habits die hard.", + "length": 30 + }, + { + "text": "'These two are really in love.", + "length": 30 + }, + { + "text": "' says a friend of the couple.", + "length": 30 + }, + { + "text": "Kelly Severide on Chicago Fire.", + "length": 31 + }, + { + "text": "Alexandre Vauthier Custom Ensemble .", + "length": 36 + }, + { + "text": "Missguided Monochrome Cropped Blazer .", + "length": 38 + }, + { + "text": "DKNY Two-tone Crepe Blazer at The Outnet .", + "length": 42 + }, + { + "text": "'And they want to have children right away.", + "length": 43 + }, + { + "text": "The couple got engaged on Valentine's Day .", + "length": 43 + }, + { + "text": "'Taylor got down on one knee in front of Mr.", + "length": 44 + }, + { + "text": "What does he have that her other guys didn't?", + "length": 45 + }, + { + "text": "But she never stopped hoping it would happen.", + "length": 45 + }, + { + "text": "'Stefani never thought she'd find lasting love.", + "length": 47 + }, + { + "text": "Shop the current collection at Luisa Via Roma .", + "length": 47 + }, + { + "text": "Want this designer look on a high street budget?", + "length": 48 + }, + { + "text": "Nell Two-Tone Buttoned 'Kara' Blazer at Bluefly .", + "length": 49 + }, + { + "text": "She knew then and there they were going to hook up.", + "length": 51 + }, + { + "text": "And they are thrilled over their daughter's engagement.", + "length": 55 + }, + { + "text": "Eyes wide shut: Taylor plants a smooch on his lady love.", + "length": 56 + }, + { + "text": "We've seen the transformation right before our very eyes.", + "length": 57 + }, + { + "text": "Shop our edit of monochrome blazers below starting at just $60.", + "length": 63 + }, + { + "text": "Alice + Olivia Oliver Draped-Lapel Blazer at Saks Fifth Avenue .", + "length": 64 + }, + { + "text": "Then, on Valentine's Day he returned to the restaurant to purpose.", + "length": 66 + }, + { + "text": "That way, when they come back together, there are always fireworks.", + "length": 67 + }, + { + "text": "' And a source says the happy couple can't wait to marry and have kids.", + "length": 71 + }, + { + "text": "Who would have imagined that scene might actually play out in real life?", + "length": 72 + }, + { + "text": "Germanotta to ask for their daughter's hand in marriage,' says the source.", + "length": 74 + }, + { + "text": "'Taylor and Gaga are not only lovers, they are best friends,' says the pal.", + "length": 75 + }, + { + "text": "'Taylor and Stefani are planning to marry later this year,' says the source.", + "length": 76 + }, + { + "text": "'And he encouraged her to heal herself through exercise, yoga and meditation.", + "length": 77 + }, + { + "text": "' The source says Gaga's parents, Joe and Cindy Germanotta, also adore Taylor.", + "length": 78 + }, + { + "text": "' According to the friend, the singer was totally taken by Taylor, who plays Lt.", + "length": 80 + }, + { + "text": "She also loves the fact he fell in love with Stefani, not her famous Gaga persona.", + "length": 82 + }, + { + "text": "He won his role as Lieutenant Kelly Severide in Chicago fire the following year .", + "length": 82 + }, + { + "text": "But she was surprised when he planted an unscripted kiss on her lips during the shoot.", + "length": 86 + }, + { + "text": "It was on the set of the first video in July 2011 that Gaga first met the hunky actor .", + "length": 87 + }, + { + "text": "They would love to have at least three kids, because they both come from large families.", + "length": 88 + }, + { + "text": "Well, take a look for yourself in these Daily Mail Online exclusive modeling photos taken in 2011.", + "length": 98 + }, + { + "text": "'There was a steamy scene between them, and Taylor just reached over and kissed her,' says the friend.", + "length": 102 + }, + { + "text": "' Strike a pose: Taylor Kinney did this modeling session while he was co-staring in The Vampire Diaries.", + "length": 104 + }, + { + "text": "Life follows art: Lady Gaga  portrayed a bride all in white while Kinney wears a white tuxedo forYou and I.", + "length": 108 + }, + { + "text": "But leave it to the newly engaged crooner to spice things up with a thigh-high side slit and fishnet stockings.", + "length": 111 + }, + { + "text": "Draped in a black and white blazer with a plunging neckline and sheer skirt, the neutral palette was pretty tame.", + "length": 113 + }, + { + "text": "'They understand they are on two separate journeys and they allow each other the space to do what they have to do.", + "length": 114 + }, + { + "text": "Back in September the Born This Way singer wore a custom Alexandre Vauthier ensemble to an event for Harper's Bazaar.", + "length": 117 + }, + { + "text": "Gaga's couture creation is not available to the masses, but you can score other items from the brand at Luisa Via Roma.", + "length": 119 + }, + { + "text": "Gaga, 28, was instantly attracted to 33-year-old Kinney's dark good looks and blue eyes when they met on location in Nebraska.", + "length": 126 + }, + { + "text": "Somewhere under the bizarre outfits, strange makeup and wild wigs there was a style star waiting to emerge and now she has arrived.", + "length": 131 + }, + { + "text": "Like aprayer: Gaga attended a service at the Grandview United Methodist Church in his hometown in Pennsylvania with Taylor in December .", + "length": 136 + }, + { + "text": "It looks like finding love and settling down with her hunky hubby-to-be has affected Lady Gaga's taste in fashion and we like what we see.", + "length": 138 + }, + { + "text": "'But take one look at Taylor and you know right away that Gaga didn't do too badly either when it comes to finding love,' the source added.", + "length": 139 + }, + { + "text": "Taylor further endeared himself to his lady love in early 2013 when she broke her right hip and suffered a labral tear that required surgery.", + "length": 141 + }, + { + "text": "'Joe and Cindy were thrilled to be part of the proposal and Joe even brought out a few bottles of Dom Perignon Champagne to toast the occasion.", + "length": 143 + }, + { + "text": "And they've found a way to make their relationship work despite the fact he's often stuck in Chicago shooting 'Fire,' and she's out on the road.", + "length": 144 + }, + { + "text": "of her folks, and a few close friends, Taylor announced it was his intention to marry Stefani, and that he wanted her parents to know he was serious.", + "length": 149 + }, + { + "text": "She also appreciates his great sense of humor and she loves how he's super sexy, while still being so sensitive that he cries when she sings love songs.", + "length": 152 + }, + { + "text": "After a string of failed romances, Stefani Germanotta – aka Lady Gaga - is finally engaged to the man of her dreams - Chicago Fire star Taylor Kinney.", + "length": 152 + }, + { + "text": "' Super rich Stefani – who according to Forbes Magazine earned $33 million over the past eight months –  is obviously a real catch for her actor beau.", + "length": 155 + }, + { + "text": "'When the director yelled, 'cut,' she asked him why he'd kissed her, since it wasn't part of the script, and he just gave her this sexy smile and walked off.", + "length": 157 + }, + { + "text": "' Sensitive: Taylor posed for these sexy snaps and a friend tells Daily Mail Online that it's not only his sex apeal but his sentimental heart that won Gaga over .", + "length": 163 + }, + { + "text": "'During the six months it took Stefani to recover, he was constantly by her side, pushing her around in the wheel chair and waiting on her hand and foot,' says another source.", + "length": 175 + }, + { + "text": "Bethrothed: Gaga proudly showed off her stunning heart-shaped diamond engagement ring on Monday after accepting her boyfriend Taylor Kinney's marriage proposal on Valentine's Day .", + "length": 180 + }, + { + "text": "'When she met Taylor on the set of her 2011 music video, You and I, he was cast as the mad scientist opposite her bizarre mermaid character and by the end of the six minute video – the two were shown getting married.", + "length": 218 + }, + { + "text": "When Gaga met Taylor on the set of her 2011 music video, You and I, he was cast as the mad scientist opposite her bizarre mermaid character and by the end of the six minute video – the two were shown getting married .", + "length": 219 + }, + { + "text": "'He is totally supportive of her career, her bizarre way of dressing and her freaky performances, and she loves the fact he's an adventurer who likes to surf, paddleboard, hike in the mountains, sky dive and ride fast motorcycles.", + "length": 230 + }, + { + "text": "'Last fall, after three years of togetherness, Joe asked Taylor what his intentions towards his daughter were and the following weekend, Taylor booked a table at her parent's New York restaurant, Joanne Trattoria, and showed them he meant business.", + "length": 248 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4574814438819885 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:52.206554832Z", + "first_section_created": "2025-12-23T09:35:52.206894445Z", + "last_section_published": "2025-12-23T09:35:52.207429166Z", + "all_results_received": "2025-12-23T09:35:52.356499224Z", + "output_generated": "2025-12-23T09:35:52.356754734Z", + "total_processing_time_ms": 150, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 149, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:52.206894445Z", + "publish_time": "2025-12-23T09:35:52.20727196Z", + "first_worker_start": "2025-12-23T09:35:52.207611873Z", + "last_worker_end": "2025-12-23T09:35:52.355633Z", + "total_journey_time_ms": 148, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:52.208198896Z", + "start_time": "2025-12-23T09:35:52.208252898Z", + "end_time": "2025-12-23T09:35:52.208362002Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:52.208312Z", + "start_time": "2025-12-23T09:35:52.208463Z", + "end_time": "2025-12-23T09:35:52.355633Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 147 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:52.208204696Z", + "start_time": "2025-12-23T09:35:52.208294299Z", + "end_time": "2025-12-23T09:35:52.208424704Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:52.20754427Z", + "start_time": "2025-12-23T09:35:52.207611873Z", + "end_time": "2025-12-23T09:35:52.207652075Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:52.207339862Z", + "publish_time": "2025-12-23T09:35:52.207429166Z", + "first_worker_start": "2025-12-23T09:35:52.208016589Z", + "last_worker_end": "2025-12-23T09:35:52.298865Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:52.208190695Z", + "start_time": "2025-12-23T09:35:52.208236197Z", + "end_time": "2025-12-23T09:35:52.208265898Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:52.208307Z", + "start_time": "2025-12-23T09:35:52.208464Z", + "end_time": "2025-12-23T09:35:52.298865Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:52.207958086Z", + "start_time": "2025-12-23T09:35:52.208016589Z", + "end_time": "2025-12-23T09:35:52.20806009Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:52.208264098Z", + "start_time": "2025-12-23T09:35:52.208293499Z", + "end_time": "2025-12-23T09:35:52.2083088Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 237, + "min_processing_ms": 90, + "max_processing_ms": 147, + "avg_processing_ms": 118, + "median_processing_ms": 147, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3289, + "slowest_section_id": 0, + "slowest_section_time_ms": 148 + } +} diff --git a/data/output/0067f67951e7fcf49b83de3544ef53f8d8d34c85.json b/data/output/0067f67951e7fcf49b83de3544ef53f8d8d34c85.json new file mode 100644 index 0000000..0e61397 --- /dev/null +++ b/data/output/0067f67951e7fcf49b83de3544ef53f8d8d34c85.json @@ -0,0 +1,298 @@ +{ + "file_name": "0067f67951e7fcf49b83de3544ef53f8d8d34c85.txt", + "total_words": 621, + "top_n_words": [ + { + "word": "the", + "count": 49 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "elephants", + "count": 10 + }, + { + "word": "poachers", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "central", + "count": 8 + }, + { + "word": "said", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "\" he said.", + "length": 10 + }, + { + "text": "3 million.", + "length": 10 + }, + { + "text": "A kilogram (2.", + "length": 14 + }, + { + "text": "\" Robert Jackson, the U.", + "length": 24 + }, + { + "text": "\"The plan is a good one.", + "length": 24 + }, + { + "text": "But execution is now critical.", + "length": 30 + }, + { + "text": "The operation is estimated to cost about $2.", + "length": 44 + }, + { + "text": "CNN's Jim Kavanagh contributed to this report.", + "length": 46 + }, + { + "text": "ambassador to Cameroon, said he was pleased with the meeting.", + "length": 61 + }, + { + "text": "It is feared they will be completely wiped out within seven years.", + "length": 66 + }, + { + "text": "The toll could reach 50 after a thorough search is made, Mbarga added.", + "length": 70 + }, + { + "text": "Officials estimated that 1,700 forest elephants remain in the two Cameroonian parks.", + "length": 84 + }, + { + "text": "The announcement called on other nations to contribute additional funds to sustain the effort.", + "length": 94 + }, + { + "text": "Political analysts say the proceeds fund rebel groups in Sudan and the Central African Republic.", + "length": 96 + }, + { + "text": "The same poachers were believed to be responsible for hundreds of elephant deaths over the past year.", + "length": 101 + }, + { + "text": "Very young -- even newborn -- elephants were among the carcasses found in Nki and Lobeke national parks.", + "length": 104 + }, + { + "text": "On the night of March 14 to 15, poachers slaughtered killed at least 89 elephants in southern Chad, WWF said.", + "length": 109 + }, + { + "text": "2 pounds) of their ivory sells for hundreds of dollars on the underground market in places such as China and Thailand.", + "length": 118 + }, + { + "text": "Forest elephants are distinguished from the more familiar savanna elephants by their smaller size and straighter tusks.", + "length": 119 + }, + { + "text": "They are also believed to be behind the killing of at least 30 elephants in the Central African Republic since January 1.", + "length": 121 + }, + { + "text": "Evidence indicates that a horseback-riding band of about 300 poachers from Sudan was behind the slaughter, officials said.", + "length": 122 + }, + { + "text": "The poachers on horseback are also suspected of killing 300 elephants in Cameroon's Bouba N'Djida National Park in early 2012.", + "length": 126 + }, + { + "text": "The carnage prompted Cameroon to mobilize 600 elite troops to try to keep the poachers from crossing the border again, WWF reported.", + "length": 132 + }, + { + "text": "The communique was issued at the end of a three-day emergency anti-poaching ministerial conference held in Cameroon's capital, Yaoundé.", + "length": 138 + }, + { + "text": "\"The carcasses are still fresh, indicating the killings took place probably only this month,\" ecologist Theophile Mbarga told CNN on Tuesday.", + "length": 141 + }, + { + "text": "In the statement, the ECCAS states congratulated Thailand for its March 3 decision to ban its legal domestic ivory trade and urged its vigorous enforcement.", + "length": 156 + }, + { + "text": "A recent peer-reviewed study published at PLOS One documented a \"catastrophic\" 62 percent decline in Central Africa's forest elephant population over nine years.", + "length": 161 + }, + { + "text": "\"I am, however, concerned that there is no mention of corruption in the statement, because it contributes directly to the poaching and trafficking problem,\" he said.", + "length": 165 + }, + { + "text": "Ivory consumers \"need to be sensitized to the consequences\" of the ivory trade, the statement said, adding that \"destination countries (should) adopt measures to reduce ivory demand.", + "length": 182 + }, + { + "text": "\"We recommend the mobilization of all defense and security forces in the affected countries\" to stop the poachers, members of the Economic Community of Central African States said in a joint statement.", + "length": 201 + }, + { + "text": "The dead elephants were found closely clustered -- less than 35 feet apart -- indicating the poachers used powerful, modern weapons, conservation group WWF project manager Zacharie Nzooh told journalists Tuesday.", + "length": 212 + }, + { + "text": "Savanna elephant populations in the Central African Republic are believed to have plummeted from around 80,000 just 30 years ago to a few hundred today, according to WWF, formerly known as the World Wildlife Fund.", + "length": 213 + }, + { + "text": "Yaounde, Cameroon (CNN) -- Heavily armed poachers recently killed nearly 40 endangered forest elephants for their ivory in two national parks, officials in Cameroon said Tuesday, the latest in a string of slaughters of the animals in Central Africa.", + "length": 249 + }, + { + "text": "The governments of three Central African nations -- Cameroon, the Central African Republic and Chad -- announced Saturday they would muster as many as 1,000 soldiers for joint military operations to protect the region's last remaining savanna elephants, as the Sudanese poachers are still active in the region.", + "length": 310 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7654832601547241 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:52.70767149Z", + "first_section_created": "2025-12-23T09:35:52.7079294Z", + "last_section_published": "2025-12-23T09:35:52.708104307Z", + "all_results_received": "2025-12-23T09:35:52.775391506Z", + "output_generated": "2025-12-23T09:35:52.775568813Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:52.7079294Z", + "publish_time": "2025-12-23T09:35:52.708104307Z", + "first_worker_start": "2025-12-23T09:35:52.708664628Z", + "last_worker_end": "2025-12-23T09:35:52.77451Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:52.708776033Z", + "start_time": "2025-12-23T09:35:52.708836535Z", + "end_time": "2025-12-23T09:35:52.708903738Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:52.709046Z", + "start_time": "2025-12-23T09:35:52.709186Z", + "end_time": "2025-12-23T09:35:52.77451Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:52.708736731Z", + "start_time": "2025-12-23T09:35:52.708808234Z", + "end_time": "2025-12-23T09:35:52.708902538Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:52.708598926Z", + "start_time": "2025-12-23T09:35:52.708664628Z", + "end_time": "2025-12-23T09:35:52.70869553Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3884, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/00684270ad422f73be59de7ea95858a000bf0dd9.json b/data/output/00684270ad422f73be59de7ea95858a000bf0dd9.json new file mode 100644 index 0000000..77f4bbb --- /dev/null +++ b/data/output/00684270ad422f73be59de7ea95858a000bf0dd9.json @@ -0,0 +1,262 @@ +{ + "file_name": "00684270ad422f73be59de7ea95858a000bf0dd9.txt", + "total_words": 542, + "top_n_words": [ + { + "word": "her", + "count": 22 + }, + { + "word": "the", + "count": 16 + }, + { + "word": "danczuk", + "count": 15 + }, + { + "word": "she", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "mrs", + "count": 13 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "in", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "I am telling the truth.", + "length": 23 + }, + { + "text": "I will NEVER forgive \u0026 NEVER forget!", + "length": 36 + }, + { + "text": "I was raped or sexually abused hundreds of times.", + "length": 49 + }, + { + "text": "'She did nothing to protect me and I will never forgive her.", + "length": 60 + }, + { + "text": "Sorry we are not currently accepting comments on this article.", + "length": 62 + }, + { + "text": "It is a very difficult thing to confront – as any paedophile victim will know.", + "length": 80 + }, + { + "text": "'We will do everything we can to support victims and bring offenders to justice.", + "length": 80 + }, + { + "text": "Mrs Danczuk claims she was regularly abused in the bedroom of her family home in Greater Manchester .", + "length": 101 + }, + { + "text": "In response to the arrest today, Mrs Danczuk said she would never forgive those who doubted her claims .", + "length": 104 + }, + { + "text": "' A man, 36, has been arrested in connection with sex abuse allegations made by MP's wife Karen Danczuk .", + "length": 105 + }, + { + "text": "' Mrs Danczuk, pictured with her Labour MP husband Simon, she felt 'more numb than angry' about the abuse .", + "length": 108 + }, + { + "text": "She told the Mail on Sunday her 'selfies' are a defiant way of saying: 'I'm sexy – and I'm in control now.", + "length": 108 + }, + { + "text": "' Asked why she had waited two decades to name her attacker, she said: 'Because I am still coming to terms with it.", + "length": 115 + }, + { + "text": "Today, she told the Mail on Sunday that she felt 'more numb than angry' and her rage is instead directed at her mother.", + "length": 119 + }, + { + "text": "A 36-year-old man arrested in connection with sex abuse allegations made by MP's wife Karen Danczuk has been released on bail.", + "length": 126 + }, + { + "text": "Her father Martin Burke, now 73, moved out, leaving Mrs Danczuk, three older brothers and an older sister with their mother Susan Burke.", + "length": 136 + }, + { + "text": "After news of the arrest, Mrs Danczuk tweeted: 'For all those who doubted me, publicly slated me \u0026 made me feel like scum for coming out.", + "length": 137 + }, + { + "text": "The man, who lives a few miles from Mrs Danczuk, was arrested by Greater Manchester Police today and he has now been bailed until April 30.", + "length": 139 + }, + { + "text": "Mrs Danczuk's claims were branded a 'figment of her imagination' by her mother, as well as two of her four siblings who challenged her story.", + "length": 141 + }, + { + "text": "The family feud erupted this week as Mrs Danczuk publicly claimed her cleavage-baring selfies were rooted in the abuse she suffered as a child.", + "length": 143 + }, + { + "text": "She said she was regularly abused in the bedroom of her family home in Middleton, Greater Manchester, shortly after her parents' marriage broke up.", + "length": 147 + }, + { + "text": "Mrs Danczuk's claims were branded a 'figment of her imagination' by her mother Susan Burke (pictured) She told the newspaper: 'I am not a fantasist.", + "length": 148 + }, + { + "text": "' But Mrs Burke, 58, called her daughter an 'attention-seeker' which led Mrs Danczuk to report her claims to the police 'as a matter of urgency, to prove I am telling the truth'.", + "length": 178 + }, + { + "text": "Mrs Danczuk, who has became well known for Twitter selfies showing off her ample cleavage, claims the abuse took place between 1989 and 1995 when she was aged between six and 12.", + "length": 178 + }, + { + "text": "Mrs Danczuk, 31, who is the wife of anti-child sex abuse MP Simon Danczuk, claims she was raped and sexually assaulted 'hundreds of times' as a child at her family home in Manchester.", + "length": 183 + }, + { + "text": "' DI Caroline Ward of Greater Manchester Police said: 'We work hard to support victims of sexual offences and thoroughly investigate all reports of sexual abuse and rape no matter how long ago they happened.", + "length": 207 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5871355533599854 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:53.208862151Z", + "first_section_created": "2025-12-23T09:35:53.209130261Z", + "last_section_published": "2025-12-23T09:35:53.209308368Z", + "all_results_received": "2025-12-23T09:35:53.275784136Z", + "output_generated": "2025-12-23T09:35:53.275918341Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:53.209130261Z", + "publish_time": "2025-12-23T09:35:53.209308368Z", + "first_worker_start": "2025-12-23T09:35:53.209921692Z", + "last_worker_end": "2025-12-23T09:35:53.273087Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:53.209911491Z", + "start_time": "2025-12-23T09:35:53.209979994Z", + "end_time": "2025-12-23T09:35:53.210033196Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:53.210148Z", + "start_time": "2025-12-23T09:35:53.210286Z", + "end_time": "2025-12-23T09:35:53.273087Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:53.209908691Z", + "start_time": "2025-12-23T09:35:53.209991695Z", + "end_time": "2025-12-23T09:35:53.210076798Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:53.209826888Z", + "start_time": "2025-12-23T09:35:53.209921692Z", + "end_time": "2025-12-23T09:35:53.209973394Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3033, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/006870cb52c924abaadc261212a7c48e802e1250.json b/data/output/006870cb52c924abaadc261212a7c48e802e1250.json new file mode 100644 index 0000000..5a97130 --- /dev/null +++ b/data/output/006870cb52c924abaadc261212a7c48e802e1250.json @@ -0,0 +1,362 @@ +{ + "file_name": "006870cb52c924abaadc261212a7c48e802e1250.txt", + "total_words": 492, + "top_n_words": [ + { + "word": "the", + "count": 32 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "ronaldo", + "count": 15 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "courtesy", + "count": 10 + }, + { + "word": "s", + "count": 10 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "and", + "count": 6 + }, + { + "word": "is", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Roar .", + "length": 6 + }, + { + "text": "Ronaldo.", + "length": 8 + }, + { + "text": "ROOAAAAR.", + "length": 9 + }, + { + "text": "Shake It Off .", + "length": 14 + }, + { + "text": "The Original .", + "length": 14 + }, + { + "text": "Richard Keys .", + "length": 14 + }, + { + "text": "Usher - Yeah .", + "length": 14 + }, + { + "text": "Screaming Goat .", + "length": 16 + }, + { + "text": "' Katy Perry, ft.", + "length": 17 + }, + { + "text": "Two and a Half Men .", + "length": 20 + }, + { + "text": "Dead Poets Society .", + "length": 20 + }, + { + "text": "Courtesy of: Duncan.", + "length": 20 + }, + { + "text": "He said: 'The scream?", + "length": 21 + }, + { + "text": "Another musical entry.", + "length": 22 + }, + { + "text": "Courtesy of: Jamie H .", + "length": 22 + }, + { + "text": "Step forward, Ronaldo.", + "length": 22 + }, + { + "text": "Courtesy of: Barzaboy .", + "length": 23 + }, + { + "text": "I Will Always Love You .", + "length": 24 + }, + { + "text": "Ronaldo joins the chorus.", + "length": 25 + }, + { + "text": "Courtesy of: Mike Cripps .", + "length": 26 + }, + { + "text": "Courtesy of: Simply Spurs .", + "length": 27 + }, + { + "text": "Courtesy of: vonstrenginho .", + "length": 28 + }, + { + "text": "Courtesy of: vonstrenginho .", + "length": 28 + }, + { + "text": "Courtesy of: Vello Publico .", + "length": 28 + }, + { + "text": "Of course, Twitter exploded.", + "length": 28 + }, + { + "text": "Courtesy of: Elliot Wagland .", + "length": 29 + }, + { + "text": "No further explanation needed.", + "length": 30 + }, + { + "text": "'And you're going to hear me...", + "length": 31 + }, + { + "text": "' Courtesy of: Rhys Wynne-Lewis .", + "length": 33 + }, + { + "text": "Now this one is truly great banter.", + "length": 35 + }, + { + "text": "Indeed, Ronaldo's name was trending long into the night.", + "length": 56 + }, + { + "text": "Taylor Swift's hit song Shake It Off is first up in our list of Vines.", + "length": 70 + }, + { + "text": "'I would like you to give a demonstration of a barbaric yob,' says Williams.", + "length": 76 + }, + { + "text": "' But that didn't stop parody videos of the moment spreading across the globe.", + "length": 78 + }, + { + "text": "Here, Sportsmail have compiled the best of the bunch for your viewing pleasure.", + "length": 79 + }, + { + "text": "One particular moment though, caught the imagination of Twitter users around the world.", + "length": 87 + }, + { + "text": "Similar to the first, Ronaldo's voice is this time placed over the top of an all-time classic.", + "length": 94 + }, + { + "text": "According to former Sky Sports presenter Richard Keys, Ronaldo's scream is 'extraordinary, Des.", + "length": 95 + }, + { + "text": "This one has taken more effort, and Ronaldo's 'YOOO' is dubbed over Usher's voice numerous times.", + "length": 97 + }, + { + "text": "In this Vine, Ronaldo's scream has been likened to the popular YouTube video of a screaming goat.", + "length": 97 + }, + { + "text": "The players know I always do that shout when I score a goal or when we win - it's our team shout.", + "length": 97 + }, + { + "text": "The Real Madrid star's shout perhaps looks less out of context after he has scored on a football pitch .", + "length": 104 + }, + { + "text": "As Whitney Houston lets loose on the chorus to timeless tune, 'I Will Always Love You', in comes Ronaldo.", + "length": 105 + }, + { + "text": "Cristiano Ronaldo celebrates winning the Ballon d'Or for the second consecutive year with a loud scream .", + "length": 105 + }, + { + "text": "Huge events like this gain much media attention, and users on social media were tuned in throughout the evening.", + "length": 112 + }, + { + "text": "In a tribute to the late, great Robin Williams, Ronaldo takes part in a scene from the film, 'Dead Poets Society.", + "length": 113 + }, + { + "text": "Ronaldo later commented to Spanish television station Cuarto that the noise was part of a Real Madrid celebration.", + "length": 114 + }, + { + "text": "Cristiano Ronaldo picked up his third Ballon d'Or trophy on Monday night, beating Lionel Messi for the second year in a row.", + "length": 124 + }, + { + "text": "Crank That (Soulja Boy) In a throwback to the popular 2007 hit 'Crank That', Ronaldo's voice is placed over part of the lyrics.", + "length": 127 + }, + { + "text": "In this Vine, 'Two and a Half Men' become 'Three and a Half Men', when Ronaldo pops up to help the regular cast sing the theme tune.", + "length": 132 + }, + { + "text": "As the Real Madrid maestro stepped up to the podium to collect his award, he let out a deep scream into the microphone, to the surprise of the watching millions.", + "length": 161 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5327807664871216 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:53.710088313Z", + "first_section_created": "2025-12-23T09:35:53.710443127Z", + "last_section_published": "2025-12-23T09:35:53.710622034Z", + "all_results_received": "2025-12-23T09:35:53.771914001Z", + "output_generated": "2025-12-23T09:35:53.772061207Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:53.710443127Z", + "publish_time": "2025-12-23T09:35:53.710622034Z", + "first_worker_start": "2025-12-23T09:35:53.711157954Z", + "last_worker_end": "2025-12-23T09:35:53.77106Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:53.711270359Z", + "start_time": "2025-12-23T09:35:53.711343462Z", + "end_time": "2025-12-23T09:35:53.711408164Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:53.711452Z", + "start_time": "2025-12-23T09:35:53.711592Z", + "end_time": "2025-12-23T09:35:53.77106Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:53.711245258Z", + "start_time": "2025-12-23T09:35:53.71131046Z", + "end_time": "2025-12-23T09:35:53.711376463Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:53.711107252Z", + "start_time": "2025-12-23T09:35:53.711157954Z", + "end_time": "2025-12-23T09:35:53.711182155Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2824, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/00689cfc2cd30b752ae603d0e6ce82ee2f7e7042.json b/data/output/00689cfc2cd30b752ae603d0e6ce82ee2f7e7042.json new file mode 100644 index 0000000..878c472 --- /dev/null +++ b/data/output/00689cfc2cd30b752ae603d0e6ce82ee2f7e7042.json @@ -0,0 +1,448 @@ +{ + "file_name": "00689cfc2cd30b752ae603d0e6ce82ee2f7e7042.txt", + "total_words": 1213, + "top_n_words": [ + { + "word": "the", + "count": 56 + }, + { + "word": "to", + "count": 39 + }, + { + "word": "a", + "count": 34 + }, + { + "word": "in", + "count": 30 + }, + { + "word": "his", + "count": 22 + }, + { + "word": "he", + "count": 20 + }, + { + "word": "that", + "count": 19 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "for", + "count": 13 + }, + { + "word": "mayweather", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "99.", + "length": 3 + }, + { + "text": "Amberley, £12.", + "length": 15 + }, + { + "text": "*Muhammad Ali – Tyneside 1977.", + "length": 32 + }, + { + "text": "Maidana, then ignored the result.", + "length": 33 + }, + { + "text": "Mayweather, having instigated his .", + "length": 35 + }, + { + "text": "If so, it should be an Irish cracker.", + "length": 37 + }, + { + "text": "Khan is looking for a marquee win in America.", + "length": 45 + }, + { + "text": "‘I don’t duck anyone,’ says Mayweather.", + "length": 45 + }, + { + "text": "After all, one promise had already been broken.", + "length": 47 + }, + { + "text": "Yet of late it has become a crime to lose a fight.", + "length": 50 + }, + { + "text": "The Cinammon Kid made a tasty comeback at the weekend.", + "length": 54 + }, + { + "text": "Expect Manny Pacquiao and Amir Khan to do the same, shortly.", + "length": 60 + }, + { + "text": "But we are approaching the point where he may have to prove it.", + "length": 63 + }, + { + "text": "Twitter poll asking fans to vote whether he should fight Khan or .", + "length": 66 + }, + { + "text": "Personally, since he is the best in the world and knows it, I believe him.", + "length": 74 + }, + { + "text": "Most champions are defined by how they come back from losing their titles.", + "length": 74 + }, + { + "text": "Perhaps as early as this week, he will name a comeback opponent and a date.", + "length": 75 + }, + { + "text": "Less acceptable was Canelo’s failure to make the light-middleweight limit.", + "length": 76 + }, + { + "text": "What matters is how he responds to that setback when he returns to the ring.", + "length": 76 + }, + { + "text": "Nevertheless, Canelo made a statement as important for boxing as for himself.", + "length": 77 + }, + { + "text": "Does what he wants: Mayweather ignored his own poll about who to fight next .", + "length": 77 + }, + { + "text": "Geordie nation: A young admirer gives Ali a kiss on his arrival in Newcastle .", + "length": 78 + }, + { + "text": "Warrior: Maidana pushed Khan all the way when they fought in Las Vegas in 2010 .", + "length": 80 + }, + { + "text": "Mayweather, meanwhile, continues to exercise his box-office power over his sport.", + "length": 81 + }, + { + "text": "now to world title contention, has dished up a powerful defence against that nonsense.", + "length": 86 + }, + { + "text": "Back with a bang: Saul 'Canelo' Alvarez beat Alfredo Angulo on Saturday in Las Vegas .", + "length": 86 + }, + { + "text": "Ovation: Former world heavyweight champion Muhammad Ali touches down in Newcastle in 1977 .", + "length": 91 + }, + { + "text": "Response: Alvarez bounced back from his disappointing loss to Mayweather by beating Angulo .", + "length": 92 + }, + { + "text": "Announcement: Floyd Mayweather will fight Marcos Maidana on May 3 at MGM Grand in Las Vegas .", + "length": 93 + }, + { + "text": "So Khan, having lost almost a year waiting for God in gloves, will go his own way this spring.", + "length": 94 + }, + { + "text": "Different paths: Khan rejected the chance to fight on Adrien Broner on Mayweather's undercard .", + "length": 95 + }, + { + "text": "The consolation for Angulo was that he picked up an extra $100,000 deducted from the Alvarez purse.", + "length": 99 + }, + { + "text": "Rather than dehydrate to make the weight, shouldn’t they be moving up to a more natural division?", + "length": 99 + }, + { + "text": "But at some point, for credibility’s sake, he may have to let the public choose his opponent for him.", + "length": 103 + }, + { + "text": "In the pipeline: Amir Khan is looking for a marquee win in America, not a British showdown with Kell Brook .", + "length": 108 + }, + { + "text": "Too many boxers are hitting the scales too heavy these days and that in itself hints at another medical risk.", + "length": 109 + }, + { + "text": "Alfredo Angulo never got close to putting the bite on Canelo, despite a couple of quibbles nagging at the event.", + "length": 112 + }, + { + "text": "Much more recently, Routledge travelled from Newcastle to be a guest at Ali’s 70th birthday dinner in Louisville.", + "length": 115 + }, + { + "text": "That will not be Kell Brook in the UK, either, since that would not improve his chances of ever fighting Mayweather.", + "length": 116 + }, + { + "text": "That misconception keeps growing the longer Mayweather - 45-0 and counting - goes without suffering his first defeat.", + "length": 117 + }, + { + "text": "He said he could have squeezed off the extra poundage had it been a title fight but really that only made it more cynical.", + "length": 122 + }, + { + "text": "By his own admission, Routledge is not a writer but this memoir adds a different dimension to the Ali story and is worth the read.", + "length": 130 + }, + { + "text": "That relationship offers a touching insight into the caring side of Ali’s nature and now Routledge has put pen to paper for a new book.", + "length": 137 + }, + { + "text": "The pair kept in touch and three years later Ali not only invited Routledge to meet him in Los Angeles but invited him to stay at his mansion.", + "length": 142 + }, + { + "text": "It is a fine line which the referees have to tread but, in principle, better to stop it a mite early than end up in hospital later that night.", + "length": 142 + }, + { + "text": "That would appear to put Frampton just one victory away from his first world title bid, possibly at a sold-out stadium in Belfast in September.", + "length": 143 + }, + { + "text": "Saul ‘Canelo’ Alvarez served up a timely reminder that no boxer should be consigned to the slops bucket just because he loses the odd fight.", + "length": 144 + }, + { + "text": "Just as the British Boxing Board of Control backed their official Howard Foster, so the Nevada State Athletic Commission is defending Tony Weeks.", + "length": 145 + }, + { + "text": "In an echo of Froch-Groves 1, the referee was booed for a tenth round stoppage which many in the crowd at the MGM Grand Garden arena deemed premature.", + "length": 150 + }, + { + "text": "Floyd Mayweather has confirmed how wise Amir Khan is not to fall for the bait of appearing on the great man’s undercard after being dumped from the big fight itself.", + "length": 167 + }, + { + "text": "’ Mayweather had tried to goad Khan into fighting his protegee Adrien Broner that night, hinting that he would give our young man from Bolton his big-bucks shot if he won.", + "length": 173 + }, + { + "text": "Although Alvarez was well behind on the cards, in contrast to George Groves who was leading Carl Froch, the argument was similar: Do we want one punch too few, or one punch too many?", + "length": 182 + }, + { + "text": "While that would have boosted Mayweather’s pay-per-view revenue, Khan nursed serious doubts that he would be Floyd Jnr’s next Chosen One come September, win or lose against Broner.", + "length": 184 + }, + { + "text": "The Greatest never forgot the courtesy and attention he received from Russell Routledge when he visited Tyneside to have his marriage to former wife Veronica blessed at a local mosque.", + "length": 184 + }, + { + "text": "If, next month, Manny Pacquiao avenges that scandalous decision given to Tim Bradley the pressure will crank up yet again for the long-deferred super-fight between the PacMan and the Money Man.", + "length": 193 + }, + { + "text": "The great boxers of the past, with such rare exceptions as Rocky Marciano and Joe Calzaghe, had defeats on their records, Muhammad Ali , Joe Louis and the two Sugar Rays, Robinson and Leonard, among them.", + "length": 204 + }, + { + "text": "At his first media conference since announcing Marcos Maidana as his opponent in Las Vegas on May 3, the world’s pound-for-pound king has made this very clear: ‘I choose who I want to fight, when I want.", + "length": 207 + }, + { + "text": "Leo Santa Cruz, after impressively retaining his WBC super-bantamweight crown against former champion Cristian Mijares in Las Vegas on Saturday night, pronounced himself eager to do the same against Frampton.", + "length": 208 + }, + { + "text": "That privilege has been hard won but by hand-picking the likes of Maidana – whose rough-hewn style is likely to suit him – he risks accusations of evading rivals who might have the speed to pose a different problem.", + "length": 219 + }, + { + "text": "Clash: Carl Frampton (left) could be just one fight away from a crack at WBC champion Leo Santa Cruz (right) When Muhammad Ali came to Newcastle in 1977 he struck up a friendship with a Geordie fan which endures to this day.", + "length": 224 + }, + { + "text": "Carl Frampton – anointed by his coach the great Barry McGuigan as his successor to the throne of Ulster boxing – has been given added incentive to win his world title eliminator against Hugo Cazares in Belfast next month.", + "length": 225 + }, + { + "text": "Alvarez, having been bewildered last September by Floyd Mayweather’s most recent master class, resurfaced in Las Vegas with a dynamic performance against a fellow Mexican considered so dangerous that he revels in the nickname El Perro – The Dog.", + "length": 249 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.40353381633758545 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:54.211390578Z", + "first_section_created": "2025-12-23T09:35:54.21169869Z", + "last_section_published": "2025-12-23T09:35:54.212015402Z", + "all_results_received": "2025-12-23T09:35:54.295085711Z", + "output_generated": "2025-12-23T09:35:54.295330321Z", + "total_processing_time_ms": 83, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 83, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:54.21169869Z", + "publish_time": "2025-12-23T09:35:54.211921699Z", + "first_worker_start": "2025-12-23T09:35:54.212432118Z", + "last_worker_end": "2025-12-23T09:35:54.294382Z", + "total_journey_time_ms": 82, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:54.212538423Z", + "start_time": "2025-12-23T09:35:54.212612725Z", + "end_time": "2025-12-23T09:35:54.212702329Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:54.212999Z", + "start_time": "2025-12-23T09:35:54.213146Z", + "end_time": "2025-12-23T09:35:54.294382Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 81 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:54.212373516Z", + "start_time": "2025-12-23T09:35:54.212432118Z", + "end_time": "2025-12-23T09:35:54.212528922Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:54.212533122Z", + "start_time": "2025-12-23T09:35:54.212610625Z", + "end_time": "2025-12-23T09:35:54.212669828Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:54.2119588Z", + "publish_time": "2025-12-23T09:35:54.212015402Z", + "first_worker_start": "2025-12-23T09:35:54.212620726Z", + "last_worker_end": "2025-12-23T09:35:54.290092Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:54.212701429Z", + "start_time": "2025-12-23T09:35:54.21273973Z", + "end_time": "2025-12-23T09:35:54.212787232Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:54.212922Z", + "start_time": "2025-12-23T09:35:54.213049Z", + "end_time": "2025-12-23T09:35:54.290092Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:54.212560323Z", + "start_time": "2025-12-23T09:35:54.212620726Z", + "end_time": "2025-12-23T09:35:54.212689028Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:54.212651527Z", + "start_time": "2025-12-23T09:35:54.212690828Z", + "end_time": "2025-12-23T09:35:54.212711529Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 158, + "min_processing_ms": 77, + "max_processing_ms": 81, + "avg_processing_ms": 79, + "median_processing_ms": 81, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3462, + "slowest_section_id": 0, + "slowest_section_time_ms": 82 + } +} diff --git a/data/output/0068a4b536d5c968ae74540d550a6adacee6a8f9.json b/data/output/0068a4b536d5c968ae74540d550a6adacee6a8f9.json new file mode 100644 index 0000000..1f81c7f --- /dev/null +++ b/data/output/0068a4b536d5c968ae74540d550a6adacee6a8f9.json @@ -0,0 +1,528 @@ +{ + "file_name": "0068a4b536d5c968ae74540d550a6adacee6a8f9.txt", + "total_words": 1297, + "top_n_words": [ + { + "word": "the", + "count": 87 + }, + { + "word": "to", + "count": 39 + }, + { + "word": "of", + "count": 28 + }, + { + "word": "said", + "count": 26 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "that", + "count": 22 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "river", + "count": 19 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "is", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "5 feet.", + "length": 7 + }, + { + "text": "5 feet -- 12.", + "length": 13 + }, + { + "text": "Nonetheless, 1.", + "length": 15 + }, + { + "text": "(CNN) -- The U.", + "length": 15 + }, + { + "text": "Bobby Jindal said Friday.", + "length": 25 + }, + { + "text": "6 feet in Taft on May 23.", + "length": 25 + }, + { + "text": "However, Scott Welchel, a St.", + "length": 29 + }, + { + "text": "Womack talks about flood costs .", + "length": 32 + }, + { + "text": "5 million cubic feet per second.", + "length": 32 + }, + { + "text": "\"They say bad luck comes in threes.", + "length": 35 + }, + { + "text": "The river was expected to crest at 57.", + "length": 38 + }, + { + "text": "Residents gawk at Mississippi's rise .", + "length": 38 + }, + { + "text": "\"Now is the time to take action,\" he said.", + "length": 42 + }, + { + "text": "Stars talk about the flooding and response .", + "length": 44 + }, + { + "text": "I hope this is the end of it,\" Moore told WPTY.", + "length": 47 + }, + { + "text": "They advised residents to expect road closings.", + "length": 47 + }, + { + "text": "for corn, wheat, rice and cotton, officials said.", + "length": 49 + }, + { + "text": "The Mississippi River is expected to crest at 26.", + "length": 49 + }, + { + "text": "It is expected to crest May 23 at more than 19 feet.", + "length": 52 + }, + { + "text": "NBA's Grizzlies inspired by fans in flooded Memphis .", + "length": 53 + }, + { + "text": "The Morganza Spillway has not been opened since 1973.", + "length": 53 + }, + { + "text": "A collective gasp as Louisiana town braces for flood .", + "length": 54 + }, + { + "text": "8 feet in New Orleans, just a fraction below flood stage.", + "length": 57 + }, + { + "text": "Jindal urged southeastern Louisiana residents to evacuate.", + "length": 58 + }, + { + "text": "Residents near Vicksburg counted on a levee for protection.", + "length": 59 + }, + { + "text": "\"It would impact every industry along the river,\" Welchel said.", + "length": 63 + }, + { + "text": "The Millington resident said he lost a house to a fire in 2009.", + "length": 63 + }, + { + "text": "Meanwhile, in the Arkansas town of Helena, the river crested at 56.", + "length": 67 + }, + { + "text": "5 feet above flood stage, according to the National Weather Service.", + "length": 68 + }, + { + "text": "15 feet above flood stage, according to the National Weather Service.", + "length": 69 + }, + { + "text": "That will mean a total of 264 bays will be open in the 350-bay spillway.", + "length": 72 + }, + { + "text": "Armstrong said he expected higher water Friday, with more homes affected.", + "length": 73 + }, + { + "text": "CNN's Mariano Castillo, Mia Aquino and Erica Henry contributed to this report.", + "length": 78 + }, + { + "text": "The National Weather Service said that as of Friday morning, the river was at 16.", + "length": 81 + }, + { + "text": "\"We'll do what we've got to do and keep praying,\" Moore said, holding back tears.", + "length": 81 + }, + { + "text": "Seven parishes are expected to be affected by the opening, according to the Corps.", + "length": 82 + }, + { + "text": "The strategy in Morgan City, officials say, is to reinforce the levees around the city.", + "length": 87 + }, + { + "text": "More than two dozen roads were closed and about 45 businesses will be closed by Friday.", + "length": 87 + }, + { + "text": "The New Orleans levees are built to withstand 20 feet, according to the weather service.", + "length": 88 + }, + { + "text": "Homes that were built between the levee and the Mississippi River were the first affected.", + "length": 90 + }, + { + "text": "\"Really, we're just waiting,\" said Evie Bertaut, who has lived in Morgan City for 50 years.", + "length": 91 + }, + { + "text": "The river's slow pace has given emergency responders more time to prepare, forecasters said.", + "length": 92 + }, + { + "text": "However, those belongings were destroyed when his storage unit was flooded several days ago.", + "length": 92 + }, + { + "text": "If it reaches 27 feet, officials told WGNO, the plant's water intake system could shut down.", + "length": 92 + }, + { + "text": "Army Corps of Engineers could open the Morganza Spillway as early as Saturday, Louisiana Gov.", + "length": 93 + }, + { + "text": "\"That's just something that isn't easy for people to deal with, especially on a moment's notice.", + "length": 96 + }, + { + "text": "Projections indicate the tipping point could be hit as early as Saturday evening, Jindal has said.", + "length": 98 + }, + { + "text": "Flood stage at Vicksburg, the level at which the river may begin flowing over its banks, is 43 feet.", + "length": 100 + }, + { + "text": "The backwater levee was being \"armored\" by a heavy plastic coating to prevent it from washing out, he said.", + "length": 107 + }, + { + "text": "That's where efforts were being focused Friday, rather than on handing out sandbags to individual residents.", + "length": 108 + }, + { + "text": "Some officials said Thursday that spillover effects resulting from the flood could threaten other industries.", + "length": 109 + }, + { + "text": "\"Most people are getting their photographs together, things that you can't replace in case you have to go,\" she said.", + "length": 117 + }, + { + "text": "Coast Guard said floodwaters could close the Mississippi River to ships at the New Orleans port as early as Monday morning.", + "length": 123 + }, + { + "text": "Upriver in Vicksburg, Mississippi, Police Chief Walter Armstrong said 600 residents had been evacuated as of Thursday night.", + "length": 124 + }, + { + "text": "Officials believe that the levees will protect the city from flooding, but some are taking preliminary precautions, she said.", + "length": 125 + }, + { + "text": "That level is expected to be reached Monday, said Charlie Tindall, attorney for the Board of Mississippi Levee Commissioners.", + "length": 125 + }, + { + "text": "Carl Rhode of Entergy, the plant's operator, told WGNO that the threat to the intake system is not a matter of nuclear safety.", + "length": 126 + }, + { + "text": "Other farmers in Mississippi, Missouri, Tennessee and Arkansas rushed to salvage what wheat they could ahead of the rising water.", + "length": 129 + }, + { + "text": "Charles Parish Emergency Operations Center official, said shutting down the plant would have a \"domino effect\" on local industries.", + "length": 131 + }, + { + "text": "\" For residents in communities along the river, the damage has been far more devastating than can be measured in dollars and cents.", + "length": 131 + }, + { + "text": "The Mississippi River Commission has directed the Corps to operate the crucial spillway once river flows reach a certain trigger: 1.", + "length": 132 + }, + { + "text": "The Mississippi River Commission has advised a \"slow opening,\" and the flood would spread gradually over several days, the Corps said.", + "length": 134 + }, + { + "text": "But while the slow-moving water gives residents extra time to get ready, it also means that land could remain under water for some time.", + "length": 136 + }, + { + "text": "Moore said that after a flood destroyed all of his furniture last year, he decided to move everything he owned into rented storage space.", + "length": 137 + }, + { + "text": "That includes the possibility that the Waterford 3 nuclear power plant in Taft, Louisiana, could be closed, according to CNN affiliate WGNO.", + "length": 140 + }, + { + "text": "The National Guard worked around the clock to construct a flood barrier in Morgan City, Louisiana, where the Atchafalaya River was already 3.", + "length": 141 + }, + { + "text": "4 million acres in Mississippi, including 602,000 acres where crops are growing, could flood, said Rickey Grey of the state's Department of Agriculture.", + "length": 152 + }, + { + "text": "Moore said he is too preoccupied with taking care of his girlfriend, who is suffering from an infection that is damaging her liver, to look for new furniture.", + "length": 158 + }, + { + "text": "\"It's in about 10 feet of water,\" Dyersburg, Tennessee, farmer Jimmy Moody said of his 440 acres of winter wheat, which was to be harvested in the coming month.", + "length": 160 + }, + { + "text": "At Sacred Heart Church, where Bertaut works, people spent the day moving important documents such a baptismal, marriage and financial records to the second floor.", + "length": 162 + }, + { + "text": "The backwater levee is designed to keep water from backing into the Yazoo River delta and is designed lower than the mainline levee so that water can flow over it.", + "length": 163 + }, + { + "text": "In addition to the mainline levee along the river, starting near Vicksburg and extending northeast for more than 20 miles, a so-called backwater levee offers shelter.", + "length": 166 + }, + { + "text": "In Arkansas, the Farm Bureau estimated that damage to the state's agriculture could top more than $500 million as more than 1 million acres of cropland are under water.", + "length": 168 + }, + { + "text": "Danny Moore of Millington, Tennessee, told CNN affiliate WPTY that the recent disaster marked the second time in one year that flooding took away nearly everything he had.", + "length": 171 + }, + { + "text": "As for corn, farmers who were able to get into the fields during a soggy planting season in late March and April are seeing their crops in some cases under several feet of water.", + "length": 178 + }, + { + "text": "Louisiana state and local officials braced for the possibility of major flooding in the Atchafalaya River Basin if, or when, federal authorities open the spillway north of Baton Rouge.", + "length": 184 + }, + { + "text": "To help New Orleans, the Army Corps of Engineers said Friday that it will open 52 more bays at the Bonnet Carre Spillway just north of the city, diverting water into Lake Pontchartrain.", + "length": 185 + }, + { + "text": "Opening the spillway would lower anticipated cresting levels along the lower Mississippi River and divert water from Baton Rouge and New Orleans but would flood much of low-lying south-central Louisiana.", + "length": 203 + }, + { + "text": "\"We estimate that every home built on the river side of the levee from Memphis all the way to the Louisiana line is flooded,\" said Mike Womack, executive director of the Mississippi Emergency Management Agency.", + "length": 210 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5117793828248978 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:54.712802248Z", + "first_section_created": "2025-12-23T09:35:54.713196263Z", + "last_section_published": "2025-12-23T09:35:54.713570577Z", + "all_results_received": "2025-12-23T09:35:54.809896798Z", + "output_generated": "2025-12-23T09:35:54.810112507Z", + "total_processing_time_ms": 97, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 96, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:54.713196263Z", + "publish_time": "2025-12-23T09:35:54.713472773Z", + "first_worker_start": "2025-12-23T09:35:54.714141799Z", + "last_worker_end": "2025-12-23T09:35:54.782106Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:54.714137299Z", + "start_time": "2025-12-23T09:35:54.714207702Z", + "end_time": "2025-12-23T09:35:54.714321206Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:54.71439Z", + "start_time": "2025-12-23T09:35:54.714544Z", + "end_time": "2025-12-23T09:35:54.782106Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:54.714054896Z", + "start_time": "2025-12-23T09:35:54.714141799Z", + "end_time": "2025-12-23T09:35:54.714243403Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:54.714091197Z", + "start_time": "2025-12-23T09:35:54.714180901Z", + "end_time": "2025-12-23T09:35:54.714226803Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:54.713512575Z", + "publish_time": "2025-12-23T09:35:54.713570577Z", + "first_worker_start": "2025-12-23T09:35:54.714196201Z", + "last_worker_end": "2025-12-23T09:35:54.809026Z", + "total_journey_time_ms": 95, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:54.714137299Z", + "start_time": "2025-12-23T09:35:54.714198701Z", + "end_time": "2025-12-23T09:35:54.714338207Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:54.714415Z", + "start_time": "2025-12-23T09:35:54.714552Z", + "end_time": "2025-12-23T09:35:54.809026Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 94 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:54.714192301Z", + "start_time": "2025-12-23T09:35:54.714238503Z", + "end_time": "2025-12-23T09:35:54.714305506Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:54.7141561Z", + "start_time": "2025-12-23T09:35:54.714196201Z", + "end_time": "2025-12-23T09:35:54.714216102Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 161, + "min_processing_ms": 67, + "max_processing_ms": 94, + "avg_processing_ms": 80, + "median_processing_ms": 94, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3823, + "slowest_section_id": 1, + "slowest_section_time_ms": 95 + } +} diff --git a/data/output/0068fddf156f31c05520970efb570e79a10dc96d.json b/data/output/0068fddf156f31c05520970efb570e79a10dc96d.json new file mode 100644 index 0000000..bd922d4 --- /dev/null +++ b/data/output/0068fddf156f31c05520970efb570e79a10dc96d.json @@ -0,0 +1,400 @@ +{ + "file_name": "0068fddf156f31c05520970efb570e79a10dc96d.txt", + "total_words": 957, + "top_n_words": [ + { + "word": "the", + "count": 47 + }, + { + "word": "to", + "count": 35 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "in", + "count": 22 + }, + { + "word": "you", + "count": 18 + }, + { + "word": "work", + "count": 17 + }, + { + "word": "that", + "count": 16 + }, + { + "word": "are", + "count": 11 + }, + { + "word": "have", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "When .", + "length": 6 + }, + { + "text": "She said: .", + "length": 11 + }, + { + "text": "proficient.", + "length": 11 + }, + { + "text": "That is life.", + "length": 13 + }, + { + "text": "He didn't riot.", + "length": 15 + }, + { + "text": "10 and £10 an hour.", + "length": 20 + }, + { + "text": "It is about working hard.", + "length": 25 + }, + { + "text": "‘It is about keeping going.", + "length": 29 + }, + { + "text": "You could be working at Costa.", + "length": 30 + }, + { + "text": "But the minister said many young .", + "length": 34 + }, + { + "text": "hard and are prepared to give it a go.", + "length": 38 + }, + { + "text": "‘People might say, “Where are the opportunities?", + "length": 52 + }, + { + "text": "all of a sudden you realise what skills you don’t have.", + "length": 57 + }, + { + "text": "realise the hours, the years [needed] to be able to do that job.", + "length": 64 + }, + { + "text": "only when they start do they realise it can take years to become .", + "length": 66 + }, + { + "text": "But the man nicknamed the 'Chingford Skinhead' did not quite say that.", + "length": 70 + }, + { + "text": "people have unrealistic expectations about what jobs entail, and it is .", + "length": 72 + }, + { + "text": "you see your first piece of work and compare it with who you want to be, .", + "length": 74 + }, + { + "text": "He got on his bike and looked for work, and he kept looking 'til he found it.", + "length": 77 + }, + { + "text": "You can start a business in your bedroom and have clients right around the world.", + "length": 81 + }, + { + "text": "‘Everybody says, “That’s what I want to do,” but I think you’ve got to .", + "length": 82 + }, + { + "text": "‘In the 18 to 24 year olds, more people are setting up businesses than ever before.", + "length": 85 + }, + { + "text": "She added: 'You are dealt the cards you are dealt and you have to make the best of that.", + "length": 88 + }, + { + "text": "A young Conservative had asked whether rioting was a 'natural reaction to unemployment'.", + "length": 88 + }, + { + "text": "Asked if they should be prepared to take ‘entry-level jobs’, she said: ‘Absolutely.", + "length": 89 + }, + { + "text": "Most famously Norman Tebbit has long been associated with the phrase 'Get on your bike and find a job'.", + "length": 103 + }, + { + "text": "Realistic: Esther McVey said the young should remember they must start at the bottom and work their way up .", + "length": 108 + }, + { + "text": "‘We have got to get our kids up to the right standard and aptitude so that everyone wants to take them on.", + "length": 108 + }, + { + "text": "’ Preparation: Miss McVey, 46, claims it is her mission to better prepare school leavers for the work place .", + "length": 111 + }, + { + "text": "The then-Employment Secretary told the Tory Party conference: 'I grew up in the thirties with an unemployed father.", + "length": 115 + }, + { + "text": "Employment ministers often court controversy with their instructions to the jobless to make more effort to find work.", + "length": 117 + }, + { + "text": "' The remarks sparked a political storm, with Mr Tebbit accused of being insensitive to the difficulties of looking for work.", + "length": 125 + }, + { + "text": "’ Last year, more than 1,700 people applied for eight jobs at a new Costa Coffee shop in Nottingham, which paid between £6.", + "length": 126 + }, + { + "text": "But in a couple of years’ time you might say, “I’d like to manage the area” or might even want to run a hotel in Dubai.", + "length": 127 + }, + { + "text": "Young people should be prepared to take lowly jobs in coffee shops if they want to get on in life, the employment minister has said.", + "length": 132 + }, + { + "text": "In 2010, Tory Work and Pensions Secretary Iain Duncan Smith sparked a similar furore when he suggested the unemployed ought to 'get on the bus'.", + "length": 144 + }, + { + "text": "But she insisted that those who want to work hard can succeed if they are prepared to learn the ropes and ‘be realistic’ about their abilities.", + "length": 147 + }, + { + "text": "The actual quote came in response to a question about urban riots which plagued places like Handsworth in Birmingham and Brixton in south London in 1981.", + "length": 153 + }, + { + "text": "Esther McVey said jobseekers need reminding that they have to start at the bottom and work their way up, rather than expecting to walk into their dream job.", + "length": 156 + }, + { + "text": "” But at the same time you could say, never have there been more opportunities when you look at the internet, where you can have a global reach with whatever you do.", + "length": 167 + }, + { + "text": "In an interview with the Mail, she admitted that young Britons are less prepared for the world of work than foreign migrants and need to learn the basics, such as turning up on time.", + "length": 182 + }, + { + "text": "The most recent figures revealed  that 941,000 people aged between 16 and 24 are out of work, while 282,000 under-25s have been jobless for a year or more, the highest level since 1993.", + "length": 186 + }, + { + "text": "Chris Grayling, her predecessor as employment minister, hit the headlines in 2011 when he branded it ‘unacceptable’ that high street chain Pret a Manger had shops staffed entirely by foreigners.", + "length": 198 + }, + { + "text": "’ The minister started her own working life  with a family firm on Merseyside after university and then made her name in television before setting up a business forum to help women start companies.", + "length": 200 + }, + { + "text": "Miss McVey, 46, vowed that tackling youth unemployment will be her ‘top priority’ but said that those looking for work have to be prepared to get a foot on the ladder before expanding their horizons.", + "length": 203 + }, + { + "text": "The Employment Minister insisted that ‘nine out of ten jobs’ are now going to Britons, but she conceded that school leavers in the UK are underprepared for work and less qualified than many migrants.", + "length": 203 + }, + { + "text": "He cited Merthyr Tydfil in Wales as somewhere where people had become ‘static’ and ‘didn’t know if they got on the bus an hour’s journey they’d be in Cardiff and they could look for the job there’.", + "length": 211 + }, + { + "text": "Asked whether young Britons can compete with foreign workers, Miss McVey said: ‘For a long time people have left school without the right qualifications even in English and maths, therefore they are at a huge disadvantage.", + "length": 224 + }, + { + "text": "The Government plans to expand its ‘sector based work academies’ in the retail and entertainment industries – a scheme where young jobless people get six weeks training, a work-experience placement and a guaranteed job interview.", + "length": 235 + }, + { + "text": "Miss McVey spoke out after Work and Pensions Secretary Iain Duncan Smith released figures earlier this week showing that the number of Britons in jobs plunged by 413,000 between 2005 and 2010, under Labour, while the number of working foreigners soared by 736,000.", + "length": 264 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4869006723165512 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:55.21433269Z", + "first_section_created": "2025-12-23T09:35:55.214731106Z", + "last_section_published": "2025-12-23T09:35:55.215003217Z", + "all_results_received": "2025-12-23T09:35:55.289164151Z", + "output_generated": "2025-12-23T09:35:55.289370759Z", + "total_processing_time_ms": 75, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 74, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:55.214731106Z", + "publish_time": "2025-12-23T09:35:55.214931514Z", + "first_worker_start": "2025-12-23T09:35:55.215291328Z", + "last_worker_end": "2025-12-23T09:35:55.288333Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:55.21557444Z", + "start_time": "2025-12-23T09:35:55.215609741Z", + "end_time": "2025-12-23T09:35:55.215688544Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:55.215858Z", + "start_time": "2025-12-23T09:35:55.215996Z", + "end_time": "2025-12-23T09:35:55.288333Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:55.215493536Z", + "start_time": "2025-12-23T09:35:55.215569639Z", + "end_time": "2025-12-23T09:35:55.215674444Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:55.215241526Z", + "start_time": "2025-12-23T09:35:55.215291328Z", + "end_time": "2025-12-23T09:35:55.21532693Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:55.214960415Z", + "publish_time": "2025-12-23T09:35:55.215003217Z", + "first_worker_start": "2025-12-23T09:35:55.215517237Z", + "last_worker_end": "2025-12-23T09:35:55.280261Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:55.21557384Z", + "start_time": "2025-12-23T09:35:55.215609541Z", + "end_time": "2025-12-23T09:35:55.215623541Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:55.215884Z", + "start_time": "2025-12-23T09:35:55.216008Z", + "end_time": "2025-12-23T09:35:55.280261Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:55.215546238Z", + "start_time": "2025-12-23T09:35:55.215624742Z", + "end_time": "2025-12-23T09:35:55.215644242Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:55.215470535Z", + "start_time": "2025-12-23T09:35:55.215517237Z", + "end_time": "2025-12-23T09:35:55.215526338Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 136, + "min_processing_ms": 64, + "max_processing_ms": 72, + "avg_processing_ms": 68, + "median_processing_ms": 72, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2714, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/00691104b9ac66f1b040f07ba3fbeb8f16286ab0.json b/data/output/00691104b9ac66f1b040f07ba3fbeb8f16286ab0.json new file mode 100644 index 0000000..a0d6bee --- /dev/null +++ b/data/output/00691104b9ac66f1b040f07ba3fbeb8f16286ab0.json @@ -0,0 +1,326 @@ +{ + "file_name": "00691104b9ac66f1b040f07ba3fbeb8f16286ab0.txt", + "total_words": 474, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "was", + "count": 16 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "s", + "count": 10 + }, + { + "word": "i", + "count": 8 + }, + { + "word": "sarah", + "count": 8 + }, + { + "word": "a", + "count": 7 + }, + { + "word": "couple", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "\".", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "okay.", + "length": 5 + }, + { + "text": "'We .", + "length": 5 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Lizzie Parry .", + "length": 14 + }, + { + "text": "The next thing we .", + "length": 19 + }, + { + "text": "knew Charlie was out.", + "length": 21 + }, + { + "text": "05:02 EST, 26 October 2013 .", + "length": 28 + }, + { + "text": "It feels weird\" and when I .", + "length": 28 + }, + { + "text": "05:55 EST, 26 October 2013 .", + "length": 28 + }, + { + "text": "looked there was a head sticking out.", + "length": 37 + }, + { + "text": "He said: 'I was a little bit scared .", + "length": 37 + }, + { + "text": "then Sarah woke me saying \"what's that?", + "length": 39 + }, + { + "text": "what to do and to get towels and things.", + "length": 40 + }, + { + "text": "I said yes I'm sure, it's got hair on it.", + "length": 41 + }, + { + "text": "'She was saying \"are you sure it's a head?", + "length": 42 + }, + { + "text": "Midwives told the couple to return in 24 hours .", + "length": 48 + }, + { + "text": "The DJ said: 'We had a couple of hours sleep and .", + "length": 50 + }, + { + "text": "we had plenty of time and they would be here soon.", + "length": 50 + }, + { + "text": "phoned the midwife who told us to ring for an ambulance and while we .", + "length": 70 + }, + { + "text": "were waiting a woman on the phone was talking me through everything, .", + "length": 70 + }, + { + "text": "But midwives, having checked her over, told them to return in 24 hours.", + "length": 71 + }, + { + "text": "but your instincts kick in, I didn't actually do a lot, Sarah did all .", + "length": 71 + }, + { + "text": "kept wondering where the ambulance was and the woman on the phone said .", + "length": 72 + }, + { + "text": "the work I just cleaned him up when he came out made sure the cord was .", + "length": 72 + }, + { + "text": "The couple had visited Barnsley Hospital just hours earlier when Sarah's waters broke.", + "length": 86 + }, + { + "text": "He said despite his initial fears, the birth which took about half-an-hour, went well.", + "length": 86 + }, + { + "text": "Mr Smith said praised Miss Pearson for her bravery in giving birth without any pain relief.", + "length": 91 + }, + { + "text": "In anticipation of the arrival of their firstborn, the couple headed home to get some sleep.", + "length": 92 + }, + { + "text": "'Sarah did really well and I'm very proud of her for doing it without pain relief or anything like that.", + "length": 104 + }, + { + "text": "' When paramedics arrived they found little Charlie, who weighed 6lbs 15oz, curled up in his mother's arms.", + "length": 107 + }, + { + "text": "But by the time the 25-year-old realised what was happening baby Charlie was ready to make his appearance in the world.", + "length": 119 + }, + { + "text": "Checking to see what the 'wierd feeling' Sarah was experiencing was, Mr Smith, 22, discovered he could see his son's head.", + "length": 122 + }, + { + "text": "Sarah Pearson woke at the couple's home in Barnsley on October 13, telling partner Graeme Smith she 'felt something weird'.", + "length": 123 + }, + { + "text": "But their nap was cut short when Sarah, an art student, woke to find she was already in labour and her baby's head was crowning.", + "length": 128 + }, + { + "text": "A new father was forced to deliver his baby son at home when his partner woke to discover she had gone into labour in her sleep.", + "length": 128 + }, + { + "text": "False alarm: Miss Pearson and Mr Smith went to Barnsley Hospital the day before Charlie arrived, after Miss Pearson's waters broke.", + "length": 131 + }, + { + "text": "Surprise arrival: Sarah Pearson woke up to find she was in the final stages of labour leaving partner Graeme Smith to deliver their new son Charlie at the couple's home .", + "length": 170 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4867260158061981 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:55.715725027Z", + "first_section_created": "2025-12-23T09:35:55.717882112Z", + "last_section_published": "2025-12-23T09:35:55.718050019Z", + "all_results_received": "2025-12-23T09:35:55.783955626Z", + "output_generated": "2025-12-23T09:35:55.784096332Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:55.717882112Z", + "publish_time": "2025-12-23T09:35:55.718050019Z", + "first_worker_start": "2025-12-23T09:35:55.718563739Z", + "last_worker_end": "2025-12-23T09:35:55.783117Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:55.718493036Z", + "start_time": "2025-12-23T09:35:55.718563739Z", + "end_time": "2025-12-23T09:35:55.718634542Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:55.718789Z", + "start_time": "2025-12-23T09:35:55.718939Z", + "end_time": "2025-12-23T09:35:55.783117Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:55.718502337Z", + "start_time": "2025-12-23T09:35:55.718567739Z", + "end_time": "2025-12-23T09:35:55.718626142Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:55.718536238Z", + "start_time": "2025-12-23T09:35:55.718611841Z", + "end_time": "2025-12-23T09:35:55.718661043Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2520, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/00697ea6b46a074bd1d94417297b8f9c95db4d31.json b/data/output/00697ea6b46a074bd1d94417297b8f9c95db4d31.json new file mode 100644 index 0000000..5d17844 --- /dev/null +++ b/data/output/00697ea6b46a074bd1d94417297b8f9c95db4d31.json @@ -0,0 +1,290 @@ +{ + "file_name": "00697ea6b46a074bd1d94417297b8f9c95db4d31.txt", + "total_words": 493, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "johnson", + "count": 14 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "his", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "he", + "count": 11 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "s", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "I.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "Johnson was 89.", + "length": 15 + }, + { + "text": "\" Share your memories .", + "length": 23 + }, + { + "text": "He will truly be missed.", + "length": 24 + }, + { + "text": "\" Advice to young actors .", + "length": 26 + }, + { + "text": "Army Air Forces during World War II.", + "length": 36 + }, + { + "text": "He called Johnson's death \"unexpected.", + "length": 38 + }, + { + "text": "\" Before becoming an actor, Johnson served in the U.", + "length": 52 + }, + { + "text": "\"Preparation is everything, and that means studying.", + "length": 52 + }, + { + "text": "Bill benefits to pay for acting school after the war.", + "length": 53 + }, + { + "text": "Johnson's advice to young actors was to \"prepare yourself.", + "length": 58 + }, + { + "text": "Johnson is also survived by a stepson, Court, and a grandson, he said.", + "length": 70 + }, + { + "text": "He died at his home in Washington, where he lived with his wife, Connie.", + "length": 72 + }, + { + "text": "\" Another important ingredient to acting success is perseverance, he said.", + "length": 74 + }, + { + "text": "Johnson was in Ray Bradbury's 1953 sci-fi classic \"It Came From Outer Space.", + "length": 76 + }, + { + "text": "She and their daughter, Kimberly, were at his side, said agent Mike Eisenstadt.", + "length": 79 + }, + { + "text": "\" \"My prayers and condolences go out to his wife Constance and his family,\" Louise said.", + "length": 88 + }, + { + "text": "Johnson worked up until his death, signing autographs over the holidays, said Eisenstadt.", + "length": 89 + }, + { + "text": "His acting career began in the early 1950s with many jobs as a character actor on television.", + "length": 93 + }, + { + "text": "Johnson was \"just a positive and nice guy\" who always treated people with respect, his agent said.", + "length": 98 + }, + { + "text": "He played Marshal Gib Scott in two seasons of \"Black Saddle,\" a Western that ran in 1959 and 1960.", + "length": 98 + }, + { + "text": "\" The chief deputy coroner in Kitsap County, Washington, told CNN that Johnson died from natural causes.", + "length": 104 + }, + { + "text": "A noteworthy big screen role was as a nuclear physicist in the 1955 science fiction film \"This Island Earth.", + "length": 108 + }, + { + "text": "\"He will always be in our hearts and remembered from Gilligan's island as part of American pop culture history.", + "length": 111 + }, + { + "text": "Actor Russell Johnson, best known as Professor in the 1960s TV sitcom \"Gilligan's Island,\" died Thursday, his agent said.", + "length": 121 + }, + { + "text": "\"You can have all the talent in the world, but if you don't persevere, if you don't stick to it, it doesn't mean anything.", + "length": 122 + }, + { + "text": "\" \"Most of us have to really learn how to do what we do, and that takes some studying and being part of an acting group,\" he said.", + "length": 130 + }, + { + "text": "He was on a B-24 Liberator when it was shot down during a bombing raid over the Philippines in 1945, according to his official biography, and used his G.", + "length": 153 + }, + { + "text": "Johnson played the iconic role of Professor Roy Hinkley, whose scientific schemes to get the castaways rescued were always foiled by Gilligan's bumbling.", + "length": 153 + }, + { + "text": "Tina Louise, who played the glamorous Hollywood starlet Ginger on \"Gilligan's Island said she was \" very saddened to hear of the passing of Russell Johnson.", + "length": 156 + }, + { + "text": "Johnson acted in dozens of television shows after the four seasons on \"Gilligan's Island,\" but his career seemed stranded on its own island because of the popular sitcom role.", + "length": 175 + }, + { + "text": "Johnson, in a 2004 interview for the Archive of American Television said the success of \"Gilligan's Island, which he never expected to last more than the initial order of 13 episodes, was the result of the \"great chemistry\" of the cast.", + "length": 236 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.44092631340026855 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:56.218983437Z", + "first_section_created": "2025-12-23T09:35:56.220361892Z", + "last_section_published": "2025-12-23T09:35:56.220529999Z", + "all_results_received": "2025-12-23T09:35:56.286911125Z", + "output_generated": "2025-12-23T09:35:56.287073731Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:56.220361892Z", + "publish_time": "2025-12-23T09:35:56.220529999Z", + "first_worker_start": "2025-12-23T09:35:56.220978916Z", + "last_worker_end": "2025-12-23T09:35:56.285976Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:56.221010318Z", + "start_time": "2025-12-23T09:35:56.22107282Z", + "end_time": "2025-12-23T09:35:56.221126822Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:56.221234Z", + "start_time": "2025-12-23T09:35:56.221375Z", + "end_time": "2025-12-23T09:35:56.285976Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:56.221006917Z", + "start_time": "2025-12-23T09:35:56.22107192Z", + "end_time": "2025-12-23T09:35:56.221135122Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:56.220918314Z", + "start_time": "2025-12-23T09:35:56.220978916Z", + "end_time": "2025-12-23T09:35:56.221019518Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2814, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0069880dd92b8a8737d13efb84c49002584ccab1.json b/data/output/0069880dd92b8a8737d13efb84c49002584ccab1.json new file mode 100644 index 0000000..7104536 --- /dev/null +++ b/data/output/0069880dd92b8a8737d13efb84c49002584ccab1.json @@ -0,0 +1,528 @@ +{ + "file_name": "0069880dd92b8a8737d13efb84c49002584ccab1.txt", + "total_words": 925, + "top_n_words": [ + { + "word": "the", + "count": 79 + }, + { + "word": "to", + "count": 37 + }, + { + "word": "of", + "count": 30 + }, + { + "word": "in", + "count": 22 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "s", + "count": 14 + }, + { + "word": "that", + "count": 14 + }, + { + "word": "iran", + "count": 13 + }, + { + "word": "be", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "A .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Mr .", + "length": 4 + }, + { + "text": "of U.", + "length": 5 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "'Any .", + "length": 6 + }, + { + "text": "'We’ve .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "Thousands .", + "length": 11 + }, + { + "text": "military edge.", + "length": 14 + }, + { + "text": "Under threat: U.", + "length": 16 + }, + { + "text": "European Command .", + "length": 18 + }, + { + "text": "should remain open.", + "length": 19 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": ", which is designed to .", + "length": 24 + }, + { + "text": "counterpart, Leon Panetta.", + "length": 26 + }, + { + "text": "01:54 EST, 6 January 2012 .", + "length": 27 + }, + { + "text": "ever held by the two countries.", + "length": 31 + }, + { + "text": "region and indeed of the world.", + "length": 31 + }, + { + "text": "Then in a television interview, Mr .", + "length": 36 + }, + { + "text": "He said: ‘We are an integrated part .", + "length": 39 + }, + { + "text": "Israel has deployed the Arrow system, .", + "length": 39 + }, + { + "text": "jointly developed and funded with the U.", + "length": 40 + }, + { + "text": "statement from the Israeli military said: ‘U.", + "length": 47 + }, + { + "text": "UK would not allow the Strait of Hormuz to be closed.", + "length": 53 + }, + { + "text": "defence systems against incoming missiles and rockets.", + "length": 54 + }, + { + "text": "taskforce is to ensure that those shipping lanes remain open.", + "length": 61 + }, + { + "text": "miscalculation by the Iranians about the importance that the .", + "length": 62 + }, + { + "text": "and Israeli soldiers will take part as they test multiple air .", + "length": 63 + }, + { + "text": "He signalled that such action would be blocked by force of arms.", + "length": 64 + }, + { + "text": "forces in staging major missile defence exercises in the region.", + "length": 64 + }, + { + "text": "’ Mr Hammond was in Washington for his first meeting with his U.", + "length": 66 + }, + { + "text": "of the naval taskforce in the Gulf and one of the missions of that .", + "length": 68 + }, + { + "text": "intercept Iranian missiles in the stratosphere, far from the country.", + "length": 69 + }, + { + "text": "for their own security and, in the case of Israel, their qualitative .", + "length": 70 + }, + { + "text": "international community attaches to keeping the Strait of Hormuz open.", + "length": 70 + }, + { + "text": "drills, called Austere Challenge 12, are designed to improve defence .", + "length": 70 + }, + { + "text": "systems and co-operation between the forces and would be the largest .", + "length": 70 + }, + { + "text": "there would be very significant consequences for the economies of the .", + "length": 71 + }, + { + "text": "heard these kinds of threats from Iran before, but there should be no .", + "length": 71 + }, + { + "text": "attempt to close the Strait of Hormuz would be illegal and we need to .", + "length": 71 + }, + { + "text": "Hammond said: ‘Very clearly the Strait of Hormuz is one of the great .", + "length": 72 + }, + { + "text": "commercial arteries of the world – it must remain open and flowing, or .", + "length": 74 + }, + { + "text": "send a very clear message to Iran that we are determined that the Strait .", + "length": 74 + }, + { + "text": "The move could clash with a plan by Israel’s military to gear up with U.", + "length": 74 + }, + { + "text": "routinely works with partner nations to ensure their capacity to provide .", + "length": 74 + }, + { + "text": "Hammond said he wanted to send a ‘very clear message to Iran’ that the .", + "length": 76 + }, + { + "text": "warships carrying 17,000 personnel enter the Gulf through the Strait of Hormuz .", + "length": 80 + }, + { + "text": "Base: The 'mothership' is most likely to be located in the Persian Gulf, pictured .", + "length": 83 + }, + { + "text": "UK warns that it is prepared to use force to stop Iran blocking the Strait of Hormuz .", + "length": 86 + }, + { + "text": "They held talks at the Pentagon yesterday on Afghanistan, Iran and other military matters.", + "length": 90 + }, + { + "text": "The planned embargo is an attempt to persuade Iran to abandon plans to develop a nuclear weapon.", + "length": 96 + }, + { + "text": "He spoke out as President Obama yesterday rolled out a new defence strategy that will shrink the U.", + "length": 99 + }, + { + "text": "armed forces, but pledged to maintain the country’s position as the world’s dominant military power.", + "length": 104 + }, + { + "text": "The Royal Navy operates mine clearance vessels in the Gulf as part of a joint taskforce based in Bahrain.", + "length": 105 + }, + { + "text": "sustain, in the long-term, the military capability required to project power and maintain defence,’ he said.", + "length": 110 + }, + { + "text": "General Sir David Richards, the Chief of the Defence Staff, said ‘no country can defend itself if bankrupt’.", + "length": 112 + }, + { + "text": "Iran has threatened to block the 34-mile wide strait in retaliation for a planned EU trade embargo on Iranian oil.", + "length": 114 + }, + { + "text": "’ Threat: Iranian television broadcast this video of a missile being launched from an unknown location on Sunday .", + "length": 116 + }, + { + "text": "‘Any attempt by Iran to do this would be illegal and unsuccessful,’ he said in a speech at the Atlantic Council.", + "length": 116 + }, + { + "text": "Mr Hammond said countries would have to work together more to reduce the ‘astronomical’ costs of modern warfare.", + "length": 116 + }, + { + "text": "‘That is why today the debt crisis should be considered the greatest strategic threat to the future security of our nations.", + "length": 126 + }, + { + "text": "Iran announced last night that an annual naval exercise known as 'The Great Prophet' would take place in the strait next month.", + "length": 127 + }, + { + "text": "In his speech, Mr Hammond also pointed to the economic crisis as the most serious threat to national and international security.", + "length": 128 + }, + { + "text": "’ More than 15million barrels of oil pass through the narrow stretch of water between Iran and the United Arab Emirates every day.", + "length": 132 + }, + { + "text": "’ Last month, the head of the British armed forces said the biggest strategic risk facing the UK was economic rather than military.", + "length": 133 + }, + { + "text": "Mr Hammond used a speech in Washington to warn Iran that any move to close the key Gulf trade route would be opposed by the Royal Navy.", + "length": 135 + }, + { + "text": "and Israel are planning a massive military exercise in the Persian Gulf in an attempt to face down Iran over its sabre-rattling in the region.", + "length": 142 + }, + { + "text": "Admiral Ali Fadavi, naval commander of the country's Revolutionary Guards, said the drill would be 'different' to previous years, but did not specify how.", + "length": 154 + }, + { + "text": "He has already earmarked budget cuts of $489billion (£315billion) over ten years as the pace of spending slows more than a decade after the September 11, 2001, attacks.", + "length": 169 + }, + { + "text": "Britain's Defence Secretary yesterday said that the UK would take military action to prevent Iran - which is believed to be developing secret nuclear weapons - from cutting off the strait.", + "length": 188 + }, + { + "text": "Philip Hammond, the UK's Defence Secretary, warned the regime in Tehran that Britain will not tolerate the ‘very significant consequences’ if it fulfils a threat to block the Strait of Hormuz.", + "length": 196 + }, + { + "text": "But the Islamic Republic is also preparing war games in the Strait of Hormuz, a strategic oil route which the country has threatened to block off in a move which could seriously restrict energy supplies.", + "length": 203 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6500030755996704 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:56.721273609Z", + "first_section_created": "2025-12-23T09:35:56.721640924Z", + "last_section_published": "2025-12-23T09:35:56.721935136Z", + "all_results_received": "2025-12-23T09:35:56.808273251Z", + "output_generated": "2025-12-23T09:35:56.808437458Z", + "total_processing_time_ms": 87, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:56.721640924Z", + "publish_time": "2025-12-23T09:35:56.721882934Z", + "first_worker_start": "2025-12-23T09:35:56.722403154Z", + "last_worker_end": "2025-12-23T09:35:56.794639Z", + "total_journey_time_ms": 72, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:56.722474557Z", + "start_time": "2025-12-23T09:35:56.72255256Z", + "end_time": "2025-12-23T09:35:56.722667865Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:56.72262Z", + "start_time": "2025-12-23T09:35:56.722777Z", + "end_time": "2025-12-23T09:35:56.794639Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 71 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:56.722421355Z", + "start_time": "2025-12-23T09:35:56.722491458Z", + "end_time": "2025-12-23T09:35:56.722604762Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:56.722344152Z", + "start_time": "2025-12-23T09:35:56.722403154Z", + "end_time": "2025-12-23T09:35:56.722441456Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:56.721902034Z", + "publish_time": "2025-12-23T09:35:56.721935136Z", + "first_worker_start": "2025-12-23T09:35:56.722392354Z", + "last_worker_end": "2025-12-23T09:35:56.807391Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:56.722600362Z", + "start_time": "2025-12-23T09:35:56.722651864Z", + "end_time": "2025-12-23T09:35:56.722665364Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:56.722693Z", + "start_time": "2025-12-23T09:35:56.722805Z", + "end_time": "2025-12-23T09:35:56.807391Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:56.722421355Z", + "start_time": "2025-12-23T09:35:56.722471757Z", + "end_time": "2025-12-23T09:35:56.722508058Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:56.722350752Z", + "start_time": "2025-12-23T09:35:56.722392354Z", + "end_time": "2025-12-23T09:35:56.722404654Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 155, + "min_processing_ms": 71, + "max_processing_ms": 84, + "avg_processing_ms": 77, + "median_processing_ms": 84, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2740, + "slowest_section_id": 1, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/0069949dbb6fa31e3ce0870773f1c187bf5f493d.json b/data/output/0069949dbb6fa31e3ce0870773f1c187bf5f493d.json new file mode 100644 index 0000000..67aff48 --- /dev/null +++ b/data/output/0069949dbb6fa31e3ce0870773f1c187bf5f493d.json @@ -0,0 +1,516 @@ +{ + "file_name": "0069949dbb6fa31e3ce0870773f1c187bf5f493d.txt", + "total_words": 1370, + "top_n_words": [ + { + "word": "the", + "count": 78 + }, + { + "word": "a", + "count": 45 + }, + { + "word": "of", + "count": 38 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "and", + "count": 26 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "it", + "count": 24 + }, + { + "word": "i", + "count": 16 + }, + { + "word": "on", + "count": 16 + }, + { + "word": "bird", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "”.", + "length": 4 + }, + { + "text": "club.", + "length": 5 + }, + { + "text": "Has he .", + "length": 8 + }, + { + "text": "the sand.", + "length": 9 + }, + { + "text": "‘So that’s .", + "length": 16 + }, + { + "text": "‘It’s good fun.", + "length": 19 + }, + { + "text": "The hunt closes in.", + "length": 19 + }, + { + "text": "Is he a little scared?", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "across the 11th fairway.", + "length": 24 + }, + { + "text": "what it was,’ he says.", + "length": 24 + }, + { + "text": "I meet greenkeeper Carl Edwards.", + "length": 32 + }, + { + "text": "He said: 'You can see the bird .", + "length": 32 + }, + { + "text": "‘I’m not worried,’ says Harry.", + "length": 36 + }, + { + "text": "Barkway Park is a delightful little .", + "length": 37 + }, + { + "text": "‘I hope people just leave it alone.", + "length": 37 + }, + { + "text": "Then other people started saying the same.", + "length": 42 + }, + { + "text": "‘Anthony just said: “There’s that emu!", + "length": 44 + }, + { + "text": "'It was about six feet high with its head up.", + "length": 45 + }, + { + "text": "‘I did catch a glimpse of something running .", + "length": 47 + }, + { + "text": "And last night, the jokes took a turn for the worse.", + "length": 52 + }, + { + "text": "She first learned of the creature from her sister-in-law.", + "length": 57 + }, + { + "text": "There is no great desire to see her returned to captivity.", + "length": 58 + }, + { + "text": "seen a grey/brown South American bird, about 5ft 8in tall?", + "length": 58 + }, + { + "text": "I fear I can see an Essex girl joke coming already … .", + "length": 58 + }, + { + "text": "At the clubhouse, Anthony Smith has just finished a round.", + "length": 58 + }, + { + "text": "In fact, everyone seems to wish ‘Chris’ the best of luck.", + "length": 61 + }, + { + "text": "‘His dad once had a picture published in the Bath Chronicle.", + "length": 62 + }, + { + "text": "So why should a wild South American bird take a fancy to golf?", + "length": 62 + }, + { + "text": "We all thought she must have concussion and told her to lie down.", + "length": 65 + }, + { + "text": "They are also incredibly hard to catch because of their fast speeds .", + "length": 69 + }, + { + "text": "standing in a couple of the shots, in the others it was grubbing around .", + "length": 73 + }, + { + "text": "in the rape field for food, or perhaps it was trying to bury its head in .", + "length": 74 + }, + { + "text": "’ Really hard to spot: The six-foot bird has been on the run for a month.", + "length": 75 + }, + { + "text": "Certainly, as I drop in on the abandoned trio, they all seem perfectly happy.", + "length": 77 + }, + { + "text": "It’ll be fine out there and there’s plenty for it to eat,’ says Di Pyper.", + "length": 79 + }, + { + "text": "Sure enough, there is ‘Chris’, snooping around the western edge of the course.", + "length": 82 + }, + { + "text": "‘She’d just fallen off her horse and came home talking about seeing an ostrich.", + "length": 83 + }, + { + "text": "‘We’re all looking out for it now,’ says Tim’s wife, Madeleine, a piano teacher.", + "length": 88 + }, + { + "text": "As I rapidly discover, though, we’re not exactly talking Beast of Bodmin, let alone Jaws.", + "length": 91 + }, + { + "text": "‘It’s funny because my husband’s always been a keen photographer and so has his father.", + "length": 93 + }, + { + "text": "Disappeared: The rhea escaped from a smallholding in Brent Pelham on the Essex/Hertfordshire border .", + "length": 101 + }, + { + "text": "In recent days, however, there have been sporadic sightings of Mum across a five-mile radius hereabouts.", + "length": 104 + }, + { + "text": "' Big bird: The runaway rhea was also photographed near Brent Pelham on Thursday by cyclist Ray Murdoch .", + "length": 105 + }, + { + "text": "’ Son Harry, ten, is determined to go one better and has been out trying to capture some video footage.", + "length": 105 + }, + { + "text": "heard reports of sightings five miles away, near the 12th fairway at Barkway Park Golf Club, and off we go.", + "length": 107 + }, + { + "text": "‘I couldn’t believe it when I saw it,’ she says, ‘but I’m not sure I’d want to get too close to it.", + "length": 111 + }, + { + "text": "It was about my height and minding its own business so we started filming,’ says Carl, showing me the result.", + "length": 111 + }, + { + "text": "She certainly doesn’t seem to pose a threat and just ambles off into the bushes when the two men get too close.", + "length": 113 + }, + { + "text": "Mr Murdoch took these two pictures with his mobile phone after the bird trotted off into a field of rape seed oil .", + "length": 115 + }, + { + "text": "As the joint master of the local hunt, the Puckeridge, Di knows this terrain better than anyone and agrees to be my guide.", + "length": 122 + }, + { + "text": "No one is entirely sure why the bird escaped, although Jo thinks it may have been spooked by the local hunt passing nearby.", + "length": 123 + }, + { + "text": "Now, Tim’s gone one better with a picture in the national papers and we’ve even had Have I Got News For You on the phone!", + "length": 125 + }, + { + "text": "Since then, a media circus has descended, armed with lurid warnings about the  damage these birds can inflict when cornered.", + "length": 125 + }, + { + "text": "’ The locals have already named the bird Chris – after the singer, Chris Rea – despite the fact that this rhea is a she.", + "length": 126 + }, + { + "text": "’ By last night, word of our progress had spread and the media circus had started to descend on the fairways of Barkway Park.", + "length": 127 + }, + { + "text": "And by close of play yesterday evening, the ‘birdie’ gags were becoming unbearable in the 19th hole at Barkway Park Golf Club.", + "length": 130 + }, + { + "text": "Mind you, they might not be chortling quite so happily the next time they’re poking around in the rough for a missing ball … .", + "length": 132 + }, + { + "text": "In fact, as the hunt for the runaway rhea of Essex gathers momentum, they all seem to be treating the whole thing as a bit of a laugh.", + "length": 134 + }, + { + "text": "On the hunt: The Daily Mail's Robert Hardman goes in search of the missing rhea bird in the rape seed fields of Astey, Hertfordshire .", + "length": 134 + }, + { + "text": "Jo has kept rheas for many years – among her horses, sheep, geese, ducks and cats – in order to keep down the weeds on her 11 acres.", + "length": 136 + }, + { + "text": "He and his colleague, Anthony Lake, have not just seen the bird a few hours earlier, they have managed to capture it on their mobile phones.", + "length": 140 + }, + { + "text": "On Sunday, civil engineer Tim Bradshaw managed to take a charming shot of the animal in a field of rapeseed just outside the village of Anstey.", + "length": 143 + }, + { + "text": "Later on, Anthony’s wife, Charmaine, says that she spotted the bird while out with her two young children and grabbed a snap of it on her phone.", + "length": 146 + }, + { + "text": "For even if we have now narrowed down the chase, no one seems to think there is much chance of trapping this monster feather duster in the near future.", + "length": 151 + }, + { + "text": "After all, the RSPCA and the cops are warning people not to approach the bird, even though Jo Clark’s young brood wouldn’t say boo to one of her geese.", + "length": 155 + }, + { + "text": "’ In which case, it may not be too long before this bit of East Anglia starts to see rather more exotic flightless birds running wild and causing mayhem.", + "length": 155 + }, + { + "text": "It is nearly four weeks since the six-year-old female rhea went on the run from Jo Clark’s smallholding in Brent Pelham on the Essex/Hertfordshire border.", + "length": 156 + }, + { + "text": "It’s charming countryside, narrow lanes meandering through gently undulating crop-filled fields sprinkled with thatched cottages and handsome little churches.", + "length": 160 + }, + { + "text": "Jo Clark has another theory: ‘A few years ago, I did sell a breeding pair to a man near Barkway, and this one will be looking for a mate at this time of year.", + "length": 160 + }, + { + "text": "Latest sighting: Charmaine Lake captured this image of the missing rhea bird which can run at speeds of up to 40mph at Barkway Park Golf Club, near Nuthampstead, Hertfordshire .", + "length": 177 + }, + { + "text": "Since rheas are not remotely maternal (after laying their eggs, they leave the hatching and childcare to the males), it is unlikely that this one will be feeling any pangs of remorse.", + "length": 183 + }, + { + "text": "It is dashing across a nicely manicured fairway as purposefully as a club captain who has just spotted a non-member wearing inappropriate trousers and teeing off in the wrong direction.", + "length": 185 + }, + { + "text": "We’re only 35 miles from London and a short drive from Stansted Airport, but it’s a tranquil spot  with plenty of hedgerows and woods to provide cover for a furtive feathered giant.", + "length": 186 + }, + { + "text": "Spotted: A cyclist took this picture of the fugitive South American rhea - which stands at around six-foot tall - in countryside north east of Stevenage in Hertfordshire at the weekend .", + "length": 186 + }, + { + "text": "Given that this thing is the height of a man, can disembowel a human with one thwack of its three-pronged claw and weighs up to 90lb, the locals seem remarkably relaxed that it is on the loose.", + "length": 193 + }, + { + "text": "Because the Daily Mail has now tracked down this giant flightless bird – a 45mph ostrich-cum-emu usually found on South American pampas – to a golf course outside Nuthampstead, Hertfordshire.", + "length": 195 + }, + { + "text": "Tracked down: Despite the missing rhea bird being capable of killing a man with a single whack of its claw, locals at the Barkway Park Golf Club, near Nuthampstead, pictured, don't seem concerned .", + "length": 197 + }, + { + "text": "‘I followed her for four hours with a bucket of food, but you can’t herd these creatures and so I gave up,’ says Jo, who still has three remaining rheas, all one-year-old offspring of the runaway mum.", + "length": 206 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4784882068634033 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:57.222677046Z", + "first_section_created": "2025-12-23T09:35:57.223074962Z", + "last_section_published": "2025-12-23T09:35:57.223409575Z", + "all_results_received": "2025-12-23T09:35:57.316162145Z", + "output_generated": "2025-12-23T09:35:57.316392654Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:57.223074962Z", + "publish_time": "2025-12-23T09:35:57.223314572Z", + "first_worker_start": "2025-12-23T09:35:57.22378009Z", + "last_worker_end": "2025-12-23T09:35:57.315304Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:57.223921696Z", + "start_time": "2025-12-23T09:35:57.223998299Z", + "end_time": "2025-12-23T09:35:57.224092502Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:57.224113Z", + "start_time": "2025-12-23T09:35:57.224251Z", + "end_time": "2025-12-23T09:35:57.315304Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:57.223704787Z", + "start_time": "2025-12-23T09:35:57.22378009Z", + "end_time": "2025-12-23T09:35:57.223906695Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:57.223761489Z", + "start_time": "2025-12-23T09:35:57.223833792Z", + "end_time": "2025-12-23T09:35:57.223920696Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:57.223350373Z", + "publish_time": "2025-12-23T09:35:57.223409575Z", + "first_worker_start": "2025-12-23T09:35:57.223893795Z", + "last_worker_end": "2025-12-23T09:35:57.301524Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:57.223938696Z", + "start_time": "2025-12-23T09:35:57.223994899Z", + "end_time": "2025-12-23T09:35:57.224056201Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:57.224131Z", + "start_time": "2025-12-23T09:35:57.224273Z", + "end_time": "2025-12-23T09:35:57.301524Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:57.223878294Z", + "start_time": "2025-12-23T09:35:57.223923496Z", + "end_time": "2025-12-23T09:35:57.223990198Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:57.223847493Z", + "start_time": "2025-12-23T09:35:57.223893795Z", + "end_time": "2025-12-23T09:35:57.223922296Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 168, + "min_processing_ms": 77, + "max_processing_ms": 91, + "avg_processing_ms": 84, + "median_processing_ms": 91, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3774, + "slowest_section_id": 0, + "slowest_section_time_ms": 92 + } +} diff --git a/data/output/0069a18b933fb6508d753a90fb11cafea2fce416.json b/data/output/0069a18b933fb6508d753a90fb11cafea2fce416.json new file mode 100644 index 0000000..66d05d1 --- /dev/null +++ b/data/output/0069a18b933fb6508d753a90fb11cafea2fce416.json @@ -0,0 +1,404 @@ +{ + "file_name": "0069a18b933fb6508d753a90fb11cafea2fce416.txt", + "total_words": 1117, + "top_n_words": [ + { + "word": "the", + "count": 67 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "s", + "count": 24 + }, + { + "word": "a", + "count": 20 + }, + { + "word": "with", + "count": 20 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "i", + "count": 17 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "is", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "That’s my job.", + "length": 16 + }, + { + "text": "majority see it that way.", + "length": 25 + }, + { + "text": "Hearts will be no different.", + "length": 28 + }, + { + "text": "‘I had a chat with Gordon.", + "length": 28 + }, + { + "text": "We’ll see how they deal with it.", + "length": 34 + }, + { + "text": "I think he has a chance of being in our squad!", + "length": 46 + }, + { + "text": "I spoke to him and he has thoroughly enjoyed it.", + "length": 48 + }, + { + "text": "On current form, Lewis is certainly one of them.", + "length": 48 + }, + { + "text": "‘We need to take three points and apply pressure.", + "length": 51 + }, + { + "text": "‘For me, this game is just as big as Hearts,’ he said.", + "length": 58 + }, + { + "text": "It’s a totally different ball game when the pressure is on.", + "length": 61 + }, + { + "text": "Of course it will put pressure on Hearts if we get it to within a point.", + "length": 72 + }, + { + "text": "‘I can understand people trying to rename stadiums for financial purposes.", + "length": 76 + }, + { + "text": "‘Obviously with Hearts kicking off after us, it gives us a chance to do that.", + "length": 79 + }, + { + "text": "‘I’m just glad we won’t get the opportunity to find that out,’ he said.", + "length": 79 + }, + { + "text": "'But if they don’t look for our result, then fair enough, that’s up to them.", + "length": 80 + }, + { + "text": "I still think in the grand scheme of things we have far more important league games.", + "length": 84 + }, + { + "text": "‘We are delighted he’s back in the squad because we want all our best players available.", + "length": 92 + }, + { + "text": "‘Lewis trained with us this morning and is in our squad for tomorrow,’ McCoist said on Friday.", + "length": 98 + }, + { + "text": "Macleod scored in Rangers' 4-0 win over Falkirk last Saturday and has been a key figure in the team .", + "length": 101 + }, + { + "text": "‘This will be the first time in the last couple of months where they might really feel the pressure.", + "length": 102 + }, + { + "text": "Lewis Macleod (front) trains with the Rangers squad on the eve of their Championship clash with Alloa .", + "length": 103 + }, + { + "text": "Macleod has impressed Scotland manager Gordon Strachan (background) following his first senior call-up .", + "length": 104 + }, + { + "text": "But to the vast majority of supporters it will still remain Ibrox no matter what you put in front of it.", + "length": 104 + }, + { + "text": "‘Those two gentlemen and myself know the importance of winning matches and getting out of the division.", + "length": 105 + }, + { + "text": "Mike Ashley will not be renaming Ibrox, which will come as a relief to the majority of Rangers supporters .", + "length": 107 + }, + { + "text": "Ally McCoist is delighted to have Macleod back for the clash, who has been released from international duty .", + "length": 109 + }, + { + "text": "Ally McCoist celebrates during Rangers' clash with Falkirk, with the manager keen to make promotion priority .", + "length": 110 + }, + { + "text": "‘It’s safe to say he is in the Scotland squad for a reason, and the reason is that he has been doing well.", + "length": 110 + }, + { + "text": "In a League Cup tie, Rangers will face Celtic for the first time since April 2012 when they lost 3-0 at Celtic Park .", + "length": 117 + }, + { + "text": "‘I would imagine he would join back up with the Scotland squad on the Sunday (ahead of Tuesday’s game with England).", + "length": 120 + }, + { + "text": "I’d be staggered if our boys didn’t look for the Hearts and Hibs results because that’s part and parcel of football.", + "length": 122 + }, + { + "text": "‘I’m still speaking with the chairman (David Somers) and (consultant) Derek Llambias and my job is to keep winning games.", + "length": 125 + }, + { + "text": "’ The next seven days promise to be pivotal as far as the race for the second tier’s one automatic promotion spot is concerned.", + "length": 131 + }, + { + "text": "He has been pleased with Lewis and his contribution, but thankfully he’s in agreement that we’d like him to get a game at the weekend.", + "length": 138 + }, + { + "text": "‘I am not underselling the Celtic game at all or the transfer window, but I would rather strengthen to win the league than strengthen to beat Celtic.", + "length": 151 + }, + { + "text": "’ While Mike Ashley’s ambitions remain under wraps, one thing the Newcastle owner will not be doing is exercising the naming right he acquired to Ibrox.", + "length": 156 + }, + { + "text": "‘Don’t think for a minute I am undermining the Celtic game, I am trying to totally focus on the most important thing which is trying to get out of the league.", + "length": 162 + }, + { + "text": "‘People will tell you they don’t look at papers or their closest contenders but the first thing you do after a game is look to see how your opposition have done.", + "length": 165 + }, + { + "text": "McCoist fully understands the commercial pressures clubs are under and yet, as a lifelong Rangers fan, he was relieved to learn the name of Ibrox will remain unchanged.", + "length": 168 + }, + { + "text": "Ally McCoist expressed his delight at having Lewis Macleod available to face Alloa on Saturday– after Gordon Strachan agreed to temporarily excuse the player from Scotland duty.", + "length": 179 + }, + { + "text": "And the Ibrox manager feels having Macleod in his ranks heightens his side’s chances of ramping up the pressure on Robbie Neilson’s team ahead of next week’s capital showdown.", + "length": 181 + }, + { + "text": "Despite having bought them for £1m from Charles Green, Ashley this week confirmed he had torn up that deal – a move that’s undoubtedly popular with the vast majority of supporters.", + "length": 185 + }, + { + "text": "As if the Old Firm game doesn’t have enough sub-plots already, the fact it’s due to take place immediately after the closure of the January transfer window is another intriguing aspect.", + "length": 189 + }, + { + "text": "With Hearts’ lead at the top of the table just four points, McCoist’s men have the chance to briefly cut that to a single point by virtue of the Gorgie side’s late kick-off at Falkirk.", + "length": 190 + }, + { + "text": "’ Next week’s contest may briefly take the limelight away from the League Cup semi-final with Celtic in early February but a first meeting with the Parkhead men in three years is likely to hog the agenda thereafter.", + "length": 219 + }, + { + "text": "But while acknowledging many minds are already drifting towards next weekend’s first-versus-second clash at Tynecastle, McCoist sees Saturday's scheduling as a prime chance to crank up the pressure on the league leaders.", + "length": 222 + }, + { + "text": "But McCoist insists his New Year wish list - in terms of players coming and going - will be motivated solely by his club’s long-term recovery as opposed to the chance to record a one-off famous victory over their ancient rivals.", + "length": 230 + }, + { + "text": "’ With eight wins on the bounce and six clean sheets to boot, defender Darren McGregor believes a resounding win against Barry Smith’s men today will send out a message to Hearts – before they play Falkirk and ahead of next week’s showdown.", + "length": 248 + }, + { + "text": "Speaking after rival Neilson insisted this week that he doesn’t look out for Rangers’ results, McCoist said: ‘I can’t speak for the Hearts players, but I can tell you - because there is no point lying - that I look for the Hearts result, the Hibs result, every result.", + "length": 276 + }, + { + "text": "With Rangers’ Championship match with the Wasps going ahead in the middle of an international break, McCoist asked the national team boss to temporarily ‘loan’ the midfielder back to him – provided he wasn’t central to his plans for last night’s game with Ireland.", + "length": 276 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.3558119237422943 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:57.724179687Z", + "first_section_created": "2025-12-23T09:35:57.724553502Z", + "last_section_published": "2025-12-23T09:35:57.72499932Z", + "all_results_received": "2025-12-23T09:35:57.804271656Z", + "output_generated": "2025-12-23T09:35:57.804434462Z", + "total_processing_time_ms": 80, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 79, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:57.724553502Z", + "publish_time": "2025-12-23T09:35:57.724854414Z", + "first_worker_start": "2025-12-23T09:35:57.725323433Z", + "last_worker_end": "2025-12-23T09:35:57.803478Z", + "total_journey_time_ms": 78, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:57.72550634Z", + "start_time": "2025-12-23T09:35:57.725577643Z", + "end_time": "2025-12-23T09:35:57.725678747Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:57.726066Z", + "start_time": "2025-12-23T09:35:57.726209Z", + "end_time": "2025-12-23T09:35:57.803478Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 77 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:57.725321832Z", + "start_time": "2025-12-23T09:35:57.725388335Z", + "end_time": "2025-12-23T09:35:57.72551354Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:57.72525453Z", + "start_time": "2025-12-23T09:35:57.725323433Z", + "end_time": "2025-12-23T09:35:57.725367934Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:35:57.724943017Z", + "publish_time": "2025-12-23T09:35:57.72499932Z", + "first_worker_start": "2025-12-23T09:35:57.725553942Z", + "last_worker_end": "2025-12-23T09:35:57.798951Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:57.72550624Z", + "start_time": "2025-12-23T09:35:57.725578843Z", + "end_time": "2025-12-23T09:35:57.725605644Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:57.725839Z", + "start_time": "2025-12-23T09:35:57.726052Z", + "end_time": "2025-12-23T09:35:57.798951Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:57.725714948Z", + "start_time": "2025-12-23T09:35:57.72576275Z", + "end_time": "2025-12-23T09:35:57.725800551Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:57.72550924Z", + "start_time": "2025-12-23T09:35:57.725553942Z", + "end_time": "2025-12-23T09:35:57.725567642Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 149, + "min_processing_ms": 72, + "max_processing_ms": 77, + "avg_processing_ms": 74, + "median_processing_ms": 77, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3112, + "slowest_section_id": 0, + "slowest_section_time_ms": 78 + } +} diff --git a/data/output/0069a5a18a299389ea9fa6d48ea308056fe08e13.json b/data/output/0069a5a18a299389ea9fa6d48ea308056fe08e13.json new file mode 100644 index 0000000..e725951 --- /dev/null +++ b/data/output/0069a5a18a299389ea9fa6d48ea308056fe08e13.json @@ -0,0 +1,302 @@ +{ + "file_name": "0069a5a18a299389ea9fa6d48ea308056fe08e13.txt", + "total_words": 542, + "top_n_words": [ + { + "word": "the", + "count": 53 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "population", + "count": 10 + }, + { + "word": "2008", + "count": 9 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "immigrants", + "count": 7 + }, + { + "word": "states", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "5 percent in 2007.", + "length": 18 + }, + { + "text": "9 million in 2007.", + "length": 18 + }, + { + "text": "6 percent in 2007.", + "length": 18 + }, + { + "text": "Census Bureau show.", + "length": 19 + }, + { + "text": "There were about 21.", + "length": 20 + }, + { + "text": "The number of naturalized citizens in the U.", + "length": 44 + }, + { + "text": "6 million noncitizens in 2008, down from 21.", + "length": 44 + }, + { + "text": "foreign-born population represented about 12.", + "length": 45 + }, + { + "text": "5 percent of the population in 2008, down from 12.", + "length": 50 + }, + { + "text": "According to the Census Bureau's American Community Survey, the U.", + "length": 66 + }, + { + "text": "increased, partly attributed to voter drives for the 2008 election.", + "length": 67 + }, + { + "text": "The label noncitizens includes both legal residents and illegal immigrants.", + "length": 75 + }, + { + "text": "\"Naturalizations grew at a record pace between 2006 and 2008, with a total of 2.", + "length": 80 + }, + { + "text": "4 million immigrants becoming new citizens in the United States,\" according to a DHS statement.", + "length": 95 + }, + { + "text": "\"The recession has had a significant effect on immigrants' decisions on whether to come to the U.", + "length": 97 + }, + { + "text": "But given the steep upward trend in the foreign-born population since 1970, no change is big news.", + "length": 98 + }, + { + "text": "Mittelstadt noted, however, that those immigrants already in the United States appear to be staying.", + "length": 100 + }, + { + "text": "Taking into account the margin of error, it was possible that the immigrant population remained even.", + "length": 101 + }, + { + "text": ",\" said Michelle Mittelstadt, director of communications at the nonpartisan Migration Policy Institute.", + "length": 103 + }, + { + "text": "The Census survey matches reports from the Department of Homeland Security on the rise of naturalization applications.", + "length": 118 + }, + { + "text": "The Mexican-born population in the United States dropped by about 300,000 between 2007 and 2008, according to census data.", + "length": 122 + }, + { + "text": "The number of individuals who are naturalized citizens increased to 43 percent of the foreign-born population in 2008 from 42.", + "length": 126 + }, + { + "text": "Along with the decline in the noncitizen population, however, there was a notable increase in the number of naturalized citizens, Grieco said.", + "length": 142 + }, + { + "text": "\"Between '07 and '08 there really wasn't that much of a change,\" said Elizabeth Grieco, chief of immigration statistics staff at the Census Bureau.", + "length": 147 + }, + { + "text": "The largest declines in the foreign-born population were in states that were hardest hit by the recession, including California, Florida and Arizona.", + "length": 149 + }, + { + "text": "Would-be unauthorized immigrants and legal temporary workers are mostly the ones who have decided to stay put in their home countries for now, Mittelstadt said.", + "length": 160 + }, + { + "text": "The survey doesn't give a reason for the leveling off, but experts pointed to the economic downturn and the resulting high unemployment as factors behind the shift.", + "length": 164 + }, + { + "text": "The new Census statistics show that for the first time since the American Community Survey was fully implemented in 2005, the number of noncitizens decreased, Grieco said.", + "length": 171 + }, + { + "text": "The American Community Survey collects data from about 3 million addresses each year, and provides one of the most complete pictures of the population, according to the bureau.", + "length": 176 + }, + { + "text": "(CNN) -- After nearly 40 years of recorded increases, the number of immigrants living in the United States remained flat between 2007 and 2008, recent statistics released by the U.", + "length": 180 + }, + { + "text": "A significant fee increase imposed in 2007 for naturalization applications and an awareness of citizenship brought on during voter registration drives for the 2008 election help explain the increase, Mittelstadt said.", + "length": 217 + }, + { + "text": "A recent study by the Pew Hispanic Center concluded that emigration from Mexico, the largest source of immigrants to the United States, slowed at least 40 percent between mid-decade and 2008, based on national population surveys in the United States and Mexico, as well as Border Patrol apprehension figures.", + "length": 308 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.7926867008209229 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:58.225757031Z", + "first_section_created": "2025-12-23T09:35:58.227854414Z", + "last_section_published": "2025-12-23T09:35:58.228023121Z", + "all_results_received": "2025-12-23T09:35:58.287380669Z", + "output_generated": "2025-12-23T09:35:58.287540075Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:58.227854414Z", + "publish_time": "2025-12-23T09:35:58.228023121Z", + "first_worker_start": "2025-12-23T09:35:58.228573243Z", + "last_worker_end": "2025-12-23T09:35:58.286484Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:58.228581843Z", + "start_time": "2025-12-23T09:35:58.228649046Z", + "end_time": "2025-12-23T09:35:58.228713048Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:58.228783Z", + "start_time": "2025-12-23T09:35:58.228923Z", + "end_time": "2025-12-23T09:35:58.286484Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:58.228570642Z", + "start_time": "2025-12-23T09:35:58.228634345Z", + "end_time": "2025-12-23T09:35:58.228725849Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:58.22850404Z", + "start_time": "2025-12-23T09:35:58.228573243Z", + "end_time": "2025-12-23T09:35:58.228600744Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3370, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/0069b6ec1df96ce330df825e7d7bbba1fc609986.json b/data/output/0069b6ec1df96ce330df825e7d7bbba1fc609986.json new file mode 100644 index 0000000..12f678e --- /dev/null +++ b/data/output/0069b6ec1df96ce330df825e7d7bbba1fc609986.json @@ -0,0 +1,282 @@ +{ + "file_name": "0069b6ec1df96ce330df825e7d7bbba1fc609986.txt", + "total_words": 624, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "was", + "count": 16 + }, + { + "word": "gholston", + "count": 15 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "her", + "count": 13 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "s", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "21, according to WISHTV.", + "length": 25 + }, + { + "text": "A coroner determined that Allen was strangled to death.", + "length": 55 + }, + { + "text": "'It was the DNA match in the database which brought Mr.", + "length": 55 + }, + { + "text": "Snatched: Allen was snatched from her sister's porch on Aug.", + "length": 60 + }, + { + "text": "21, Gholston is then believed to have taken her to an abandon house .", + "length": 69 + }, + { + "text": "Since the teen's murder, her family has been eager to find her killer.", + "length": 70 + }, + { + "text": "Investigators say Gholston snatched Allen from her sister's porch early on Aug.", + "length": 79 + }, + { + "text": "Family: Since the teen's murder, her family has been eager to find her killer.", + "length": 79 + }, + { + "text": "Gholston's name to the attention of the detectives,' he said during a news conference.", + "length": 86 + }, + { + "text": "She was taken to an abandoned house then carried across a busy street before being burned and buried.", + "length": 101 + }, + { + "text": "Edna Wilson was found strangled to death in October and Loy Ofsthun and Selese Goss were murdered in June.", + "length": 106 + }, + { + "text": "The teen's purse and shoes were discovered about a block from where her body was found, according to police.", + "length": 108 + }, + { + "text": "Badly Burned: Later in the day after her abduction, Allen's body was found badly burned behind a house nearby .", + "length": 111 + }, + { + "text": "Curry said Gholston was arrested in the northeastern Indiana city of Bluffton this month for a parole violation.", + "length": 112 + }, + { + "text": "Several of Allen's relatives and friends held hands and prayed in a circle after the news conference announcing the charges.", + "length": 124 + }, + { + "text": "WISHTV reports that Gholston faced murder charges in 2002, but the charges were dropped when a key witness changed his story.", + "length": 125 + }, + { + "text": "They even offered a $3,000 reward at the beginning of the month to anyone with information leading to an arrest in the case .", + "length": 125 + }, + { + "text": "Gholston told police investigators that he wasn't in Indianapolis in August and knew nothing about Allen's death, Curry said.", + "length": 125 + }, + { + "text": "Investigators don't know of any connection between Allen and Gholston, who wasn't initially a suspect in the slaying, Curry said.", + "length": 129 + }, + { + "text": "He previously served prison sentences for armed robbery and drug possession, according to Indiana Department of Correction records.", + "length": 131 + }, + { + "text": "Court records didn't list an attorney for Gholston and an initial court hearing on the murder charge wasn't immediately scheduled Friday.", + "length": 137 + }, + { + "text": "They even offered a $3,000 reward at the beginning of the month to anyone with information leading to an arrest in the case, IndyStar reports.", + "length": 142 + }, + { + "text": "Gholston was released from prison in May after serving a sentence for 2006 convictions on illegal gun possession and resisting arrest charges.", + "length": 142 + }, + { + "text": "The address Gholston gave for his parole was near where her body was found, but it wasn't clear when or whether he ever lived in the neighborhood..", + "length": 147 + }, + { + "text": "Three: Police are now investigating whether Gholston is connected to the murders of three other women who were strangled to death in Indianapolis .", + "length": 147 + }, + { + "text": "Police are now investigating whether Gholston is connected to the murders of three other women who were strangled to death in Indianapolis, WISHTV reports.", + "length": 155 + }, + { + "text": "Conference: In a news conference,  Marion County Prosecutor Terry Curry said that DNA found on Allen's sandals and body matched body samples from Gholston .", + "length": 157 + }, + { + "text": "Her body was found later that day by a man walking his dog, WISHTV reports, her body was badly burned, which police believed was done to get rid of any DNA evidence.", + "length": 165 + }, + { + "text": "DNA found on Dominique Allen's sandals and body matched samples from 46-year-old William Gholston in the state inmate database, Marion County Prosecutor Terry Curry said.", + "length": 170 + }, + { + "text": "Charged: William Gholston, 46,  of Indianapolis has been charged in the killing of 15-year-old Dominique Allen whose badly burned body was found in an Indianapolis backyard .", + "length": 177 + }, + { + "text": "An ex-convict has been charged in the killing of a 15-year-old girl after DNA tests linked him to her badly burned body that was found in an Indianapolis backyard in August, a prosecutor said Friday.", + "length": 199 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5643612146377563 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:58.728771732Z", + "first_section_created": "2025-12-23T09:35:58.730027982Z", + "last_section_published": "2025-12-23T09:35:58.730203689Z", + "all_results_received": "2025-12-23T09:35:58.798216079Z", + "output_generated": "2025-12-23T09:35:58.798352385Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:58.730027982Z", + "publish_time": "2025-12-23T09:35:58.730203689Z", + "first_worker_start": "2025-12-23T09:35:58.730703008Z", + "last_worker_end": "2025-12-23T09:35:58.796049Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:58.730716509Z", + "start_time": "2025-12-23T09:35:58.730784011Z", + "end_time": "2025-12-23T09:35:58.730903616Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:58.730912Z", + "start_time": "2025-12-23T09:35:58.731049Z", + "end_time": "2025-12-23T09:35:58.796049Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:58.730707508Z", + "start_time": "2025-12-23T09:35:58.730779911Z", + "end_time": "2025-12-23T09:35:58.730877415Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:58.730654706Z", + "start_time": "2025-12-23T09:35:58.730703008Z", + "end_time": "2025-12-23T09:35:58.73073761Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3643, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0069d09ef89aef4075e0ade34fef0b7cec23c62a.json b/data/output/0069d09ef89aef4075e0ade34fef0b7cec23c62a.json new file mode 100644 index 0000000..b2556c0 --- /dev/null +++ b/data/output/0069d09ef89aef4075e0ade34fef0b7cec23c62a.json @@ -0,0 +1,242 @@ +{ + "file_name": "0069d09ef89aef4075e0ade34fef0b7cec23c62a.txt", + "total_words": 458, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "polar", + "count": 9 + }, + { + "word": "at", + "count": 8 + }, + { + "word": "bears", + "count": 8 + }, + { + "word": "into", + "count": 8 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "they", + "count": 8 + }, + { + "word": "and", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "'You .", + "length": 6 + }, + { + "text": "generally having fun together...", + "length": 32 + }, + { + "text": "Polar bears love swimming but they also .", + "length": 41 + }, + { + "text": "see the mother teaching her cubs how it's done, but they are just .", + "length": 67 + }, + { + "text": "A short time later they were confident enough to give it a go themselves .", + "length": 74 + }, + { + "text": "like to play and jump into the water, so there was a lot of spray,' she added.", + "length": 78 + }, + { + "text": "'This is a very happy family of polar bears learning to dive,' the photographer said.", + "length": 85 + }, + { + "text": "This is the adorable moment a mother polar bear teaches her young cubs the art of diving.", + "length": 89 + }, + { + "text": "The polar bear is often regarded as a marine mammal because it spends many months of the year at sea.", + "length": 101 + }, + { + "text": "One by one the young bears leap into the water - nervously at first, but increasingly confident the more they practice .", + "length": 120 + }, + { + "text": "Incoming: Photographer Natalia Nazarova said: 'A lot of people who see my photographs are surprised to see the unusual situation...", + "length": 131 + }, + { + "text": "Practice makes perfect: The young polar bear cubs (right) spent time watching their mother (left) diving into the pool at Moscow Zoo.", + "length": 133 + }, + { + "text": "In general they live relatively unsociable lives, but they are occasionally spotted playing in the wild - sometimes for hours at a time.", + "length": 136 + }, + { + "text": "The pictures were taken by Russian photographer Natalia Nazarova, 63, as she watched the young bears practicing their diving at Moscow Zoo.", + "length": 139 + }, + { + "text": "'Look at me, mummy': Having watched their mother diving into the pool at Moscow Zoo, the adorable polar bear cubs decide to follow her lead.", + "length": 140 + }, + { + "text": "Jump: Polar bears are strong and hardy animals, but their habitat is threatened, so it is considered important to maintain a captive population.", + "length": 144 + }, + { + "text": "This year Moscow Zoo is expecting a new generation of polar bears to be born, so visitors will have a chance to see diving bears long into the future .", + "length": 151 + }, + { + "text": "Leap of faith: The pictures were taken by Russian photographer Natalia Nazarova, 63, as she watched the young bears practicing their diving at Moscow Zoo .", + "length": 155 + }, + { + "text": "Naughty: The diving lesson appears to get too much for one cub who decides he'd rather get involved in a play fight - sneaking up behind his unwitting brother and pushing him into the pool .", + "length": 190 + }, + { + "text": "One by one, the cubs follow their mother's lead and throw themselves into the water - although the lesson appears to get too much for one cub who decides he'd rather get involved in a play fight - sneaking up behind his unwitting brother and pushing him into the pool.", + "length": 268 + }, + { + "text": "They love the joy of seeing these strong and healthy animals so happy, as they don't expect them to be like this in a zoo' Shake it off: Having enjoyed her time in the pool, the mother polar bear decides it's time to dry off - shaking her huge head and sending water flying everywhere .", + "length": 286 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.41000452637672424 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:59.23045998Z", + "first_section_created": "2025-12-23T09:35:59.230752892Z", + "last_section_published": "2025-12-23T09:35:59.230930899Z", + "all_results_received": "2025-12-23T09:35:59.292203623Z", + "output_generated": "2025-12-23T09:35:59.292352929Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:59.230752892Z", + "publish_time": "2025-12-23T09:35:59.230930899Z", + "first_worker_start": "2025-12-23T09:35:59.231418818Z", + "last_worker_end": "2025-12-23T09:35:59.291311Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:59.231385017Z", + "start_time": "2025-12-23T09:35:59.231446119Z", + "end_time": "2025-12-23T09:35:59.231492421Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:59.231629Z", + "start_time": "2025-12-23T09:35:59.231758Z", + "end_time": "2025-12-23T09:35:59.291311Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:59.231387617Z", + "start_time": "2025-12-23T09:35:59.231453519Z", + "end_time": "2025-12-23T09:35:59.231513022Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:59.231368516Z", + "start_time": "2025-12-23T09:35:59.231418818Z", + "end_time": "2025-12-23T09:35:59.231441119Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2586, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0069dcea43143b915e14957098606f36ae5b198b.json b/data/output/0069dcea43143b915e14957098606f36ae5b198b.json new file mode 100644 index 0000000..1351279 --- /dev/null +++ b/data/output/0069dcea43143b915e14957098606f36ae5b198b.json @@ -0,0 +1,418 @@ +{ + "file_name": "0069dcea43143b915e14957098606f36ae5b198b.txt", + "total_words": 590, + "top_n_words": [ + { + "word": "the", + "count": 29 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "fashion", + "count": 15 + }, + { + "word": "s", + "count": 15 + }, + { + "word": "for", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "armani", + "count": 11 + }, + { + "word": "he", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "is", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "Ms .", + "length": 4 + }, + { + "text": "said.", + "length": 5 + }, + { + "text": "issues.", + "length": 7 + }, + { + "text": "reason.", + "length": 7 + }, + { + "text": "Mario .", + "length": 7 + }, + { + "text": "’ Mr .", + "length": 8 + }, + { + "text": "’ he .", + "length": 8 + }, + { + "text": "’ Mr .", + "length": 8 + }, + { + "text": "‘She .", + "length": 8 + }, + { + "text": "However, .", + "length": 10 + }, + { + "text": "Currently, .", + "length": 12 + }, + { + "text": "until the end.", + "length": 14 + }, + { + "text": "‘Unfortunately, .", + "length": 19 + }, + { + "text": "A spokesperson for .", + "length": 20 + }, + { + "text": "beginning of another.", + "length": 21 + }, + { + "text": "‘We met Mr Armani .", + "length": 21 + }, + { + "text": "last day,’ he said.", + "length": 21 + }, + { + "text": "York to publish Vogue.", + "length": 22 + }, + { + "text": "'It's not professional.", + "length": 23 + }, + { + "text": "said she was sending her people.", + "length": 32 + }, + { + "text": "earlier and he is totally fine now.", + "length": 35 + }, + { + "text": "world’s other major fashion weeks.", + "length": 36 + }, + { + "text": "In a certain way, it’s right that .", + "length": 37 + }, + { + "text": "But if you go to see your dentist and .", + "length": 39 + }, + { + "text": "But, perhaps, I’m influential as well.", + "length": 40 + }, + { + "text": "for March 5 – the event’s final day.", + "length": 40 + }, + { + "text": "some shows that Anna is not able to attend.", + "length": 43 + }, + { + "text": "But the designer was not impressed by this.", + "length": 43 + }, + { + "text": "He added: ‘She is influential and powerful.", + "length": 45 + }, + { + "text": "But currently this is an empty day,’ he said.", + "length": 47 + }, + { + "text": "and everything he has done for Italy and fashion worldwide.", + "length": 59 + }, + { + "text": "Armani says Ms Wintour assured him that other high-ranking .", + "length": 60 + }, + { + "text": "Sorry we are not currently accepting comments on this article.", + "length": 62 + }, + { + "text": "She also often leaves Paris Fashion Week early to return to New .", + "length": 65 + }, + { + "text": "twelve hours between the end of one city's fashion week and the .", + "length": 65 + }, + { + "text": "he puts you in the hands of his assistant, what’s your reaction?", + "length": 66 + }, + { + "text": "spoke with Mr Armani after his press conference to resolve their .", + "length": 66 + }, + { + "text": "Wintour has been known to leave Milan Fashion Week early for this .", + "length": 67 + }, + { + "text": "the important journalists stay until the end of the week, and next .", + "length": 68 + }, + { + "text": "representatives from her publication attended the show in her place.", + "length": 68 + }, + { + "text": "prestigious schedule slot that he has held for the past two decades.", + "length": 68 + }, + { + "text": "there is an overlap, giving fashion editors and buyers a maximum of .", + "length": 69 + }, + { + "text": "Armani blames Ms Wintour’s absence on the fact that he is the only .", + "length": 70 + }, + { + "text": "della Moda’s refusal to coordinate its fashion week dates with the .", + "length": 70 + }, + { + "text": "Week calendar needs to be altered to maintain high-profile designers .", + "length": 70 + }, + { + "text": "season, we will try to guarantee the presence of other big brands the .", + "length": 71 + }, + { + "text": "with the fashion calendar now running for more than a month, there are .", + "length": 72 + }, + { + "text": "told reporters backstage after his preview show that the Milan Fashion .", + "length": 72 + }, + { + "text": "‘When we decided to show the last day, other big brands were involved.", + "length": 72 + }, + { + "text": "Boselli, the Camera della Moda president told WWD that his organization .", + "length": 73 + }, + { + "text": "Vogue told WWD that ‘Anna has the greatest respect for Giorgio Armani .", + "length": 73 + }, + { + "text": "the last few years Mr Armani has been an outspoken critic of the Camera .", + "length": 73 + }, + { + "text": "Nicolas Ghesquière’s debut show for Louis Vuitton, which is scheduled .", + "length": 74 + }, + { + "text": "this season she may stay for Paris’s entire duration in order to catch .", + "length": 74 + }, + { + "text": "influential designer to show on the final day of Milan Fashion Week – a .", + "length": 75 + }, + { + "text": "Vogue editor after his fashion show on Monday - Milan Fashion Week's final day.", + "length": 79 + }, + { + "text": "Mr Armani is so furious that he held a press conference to openly denounce the U.", + "length": 81 + }, + { + "text": "Mr Armani explained that the Camera della Moda, which presides over the schedule, is at fault.", + "length": 94 + }, + { + "text": "What she missed: Mr Armani's collection exhibited his signature for tailoring, fabricated in a primary wash of grey and green .", + "length": 127 + }, + { + "text": "Giorgio Armani has called Anna Wintour 'unprofessional' for skipping his fall 2014 show in favor of flying on to Paris Fashion Week.", + "length": 132 + }, + { + "text": "‘She took an airplane, dumped [me] and went to Paris…’ said the designer, 79, whose brand is a major advertiser in Ms Wintour's magazine.", + "length": 143 + }, + { + "text": "’ Unhappy: Giorgio Armani (pictured taking a final bow at his fashion show on Monday) has spoken out against Anna Wintour's absence at his show .", + "length": 147 + }, + { + "text": "Around town: Ms Wintour had been spotted in the front row of other Milan Fashion Week shows before leaving for Paris, including Versace (left) and Aquilano Rimondi (right) He .", + "length": 176 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6710435748100281 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:35:59.731706111Z", + "first_section_created": "2025-12-23T09:35:59.733833895Z", + "last_section_published": "2025-12-23T09:35:59.734016002Z", + "all_results_received": "2025-12-23T09:35:59.800645938Z", + "output_generated": "2025-12-23T09:35:59.800817445Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:35:59.733833895Z", + "publish_time": "2025-12-23T09:35:59.734016002Z", + "first_worker_start": "2025-12-23T09:35:59.734503122Z", + "last_worker_end": "2025-12-23T09:35:59.799782Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:35:59.734549323Z", + "start_time": "2025-12-23T09:35:59.734624226Z", + "end_time": "2025-12-23T09:35:59.73471493Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:35:59.734727Z", + "start_time": "2025-12-23T09:35:59.734876Z", + "end_time": "2025-12-23T09:35:59.799782Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:35:59.73446472Z", + "start_time": "2025-12-23T09:35:59.734535623Z", + "end_time": "2025-12-23T09:35:59.734606326Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:35:59.734441919Z", + "start_time": "2025-12-23T09:35:59.734503122Z", + "end_time": "2025-12-23T09:35:59.734534423Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3448, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/006a092019f7c6c67088ea134ced1b1a7f544197.json b/data/output/006a092019f7c6c67088ea134ced1b1a7f544197.json new file mode 100644 index 0000000..ff39752 --- /dev/null +++ b/data/output/006a092019f7c6c67088ea134ced1b1a7f544197.json @@ -0,0 +1,278 @@ +{ + "file_name": "006a092019f7c6c67088ea134ced1b1a7f544197.txt", + "total_words": 594, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "it", + "count": 16 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "we", + "count": 14 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "rangers", + "count": 11 + }, + { + "word": "celtic", + "count": 10 + }, + { + "word": "win", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "We will be up for it.", + "length": 21 + }, + { + "text": "That’s what we want.", + "length": 22 + }, + { + "text": "‘We want to win the League Cup.", + "length": 33 + }, + { + "text": "Hopefully it is going to be a great day.", + "length": 40 + }, + { + "text": "‘We want to win the Treble this season.", + "length": 41 + }, + { + "text": "‘The semi-final is going to be an amazing game.", + "length": 49 + }, + { + "text": "But of course it makes it more special that we play Rangers.", + "length": 60 + }, + { + "text": "‘Winning it is the most important thing - it doesn’t matter how.", + "length": 68 + }, + { + "text": "‘It means a lot to everyone here,’ said the 23-year-old defender.", + "length": 69 + }, + { + "text": "If we beat Rangers, we will be in the final of the first trophy we can win.", + "length": 75 + }, + { + "text": "I knew we could always meet them in a cup, but I didn’t think about it a lot.", + "length": 79 + }, + { + "text": "I wasn’t really expecting to play games against Rangers when I signed for Celtic.", + "length": 83 + }, + { + "text": "What John said was true, it doesn’t matter who we play in the tournament, we need to win.", + "length": 91 + }, + { + "text": "We have a lot of games before that one comes around, though, so we need to focus on them first.", + "length": 95 + }, + { + "text": "‘Everyone wants to experience this game - and win it, of course, which is the most important thing.", + "length": 101 + }, + { + "text": "Virgil van Dijk cannot wait to face Rangers in the League Cup semi-final at Hampden Park for Celtic .", + "length": 101 + }, + { + "text": "Ally McCoist's Rangers, in action against Hearts, are currently playing in the Scottish Championship .", + "length": 102 + }, + { + "text": "Celtic players celebrate their impressive 6-0 win over Partick Thistle in the League Cup on Wednesday .", + "length": 103 + }, + { + "text": "‘But of course you want to play in that fixture because it is unbelievable, with everything around it.", + "length": 104 + }, + { + "text": "’ Gary Hooper completed the scoring during Celtic's 3-0 win over Rangers in the last Old Firm derby in 2012 .", + "length": 111 + }, + { + "text": "Celtic players celebrate during their 3-0 win over rivals Rangers back in April 2012 in the Scottish Premiership .", + "length": 114 + }, + { + "text": "‘Yeah, it would make it more special if we beat Rangers on the way to winning the League Cup,’ the Dutchman said.", + "length": 117 + }, + { + "text": "Virgil van Dijk insists that defeating Rangers would make a potential Treble taste even sweeter for Celtic this season.", + "length": 119 + }, + { + "text": "Ronny Deila’s side will undoubtedly start favourites for the hotly-anticipated clash, with Rangers still off the pace in the Championship race.", + "length": 145 + }, + { + "text": "The Glasgow giants will lock horns for the first time in almost three years when they descend on Hampden on January 31 or February 1 for the League Cup semi-final.", + "length": 163 + }, + { + "text": "John Guidetti, Saturday’s match-winner, has already stated that whoever Celtic landed in the draw would be ‘going down’ - a view subsequently endorsed by van Dijk.", + "length": 169 + }, + { + "text": "It’s the first time the teams will have met since Celtic won 3-0 at Parkhead on 29 April 2012 - with Rangers’ subsequent financial meltdown putting the fixture into cold storage for two-and-a-half seasons.", + "length": 209 + }, + { + "text": "And, having helped Celtic to a fifth successive win against Inverness on Saturday, Van Dijk believes defeating the Ibrox club en route to the first showpiece occasion of the season would lend ever greater kudos to the prospect of a domestic clean sweep.", + "length": 253 + }, + { + "text": "’ Despite having been in four knock-out competitions since 2012, the Glasgow giants had been kept apart until Saturday’s live TV draw paired them together, with holders Aberdeen and high-flying Dundee United fighting it out in the other half of the draw.", + "length": 258 + }, + { + "text": "Celtic boss Deila made no secret of his delight at being handed the prospect of experiencing an Old Firm game in his first season at Parkhead - with a loud cheer from the dressing room on Saturday leaving no one in any doubt as to the feelings of his players.", + "length": 259 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.47428417205810547 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:00.234775314Z", + "first_section_created": "2025-12-23T09:36:00.235092926Z", + "last_section_published": "2025-12-23T09:36:00.235250533Z", + "all_results_received": "2025-12-23T09:36:00.302258384Z", + "output_generated": "2025-12-23T09:36:00.302437391Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:00.235092926Z", + "publish_time": "2025-12-23T09:36:00.235250533Z", + "first_worker_start": "2025-12-23T09:36:00.235903758Z", + "last_worker_end": "2025-12-23T09:36:00.30132Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:00.235814955Z", + "start_time": "2025-12-23T09:36:00.23594796Z", + "end_time": "2025-12-23T09:36:00.236009663Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:00.236098Z", + "start_time": "2025-12-23T09:36:00.236287Z", + "end_time": "2025-12-23T09:36:00.30132Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:00.235848556Z", + "start_time": "2025-12-23T09:36:00.235903758Z", + "end_time": "2025-12-23T09:36:00.235975361Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:00.235843756Z", + "start_time": "2025-12-23T09:36:00.235959061Z", + "end_time": "2025-12-23T09:36:00.235995962Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3281, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/006a1eaaddfd378748841ac488f35f3133a84c3b.json b/data/output/006a1eaaddfd378748841ac488f35f3133a84c3b.json new file mode 100644 index 0000000..2cc9656 --- /dev/null +++ b/data/output/006a1eaaddfd378748841ac488f35f3133a84c3b.json @@ -0,0 +1,314 @@ +{ + "file_name": "006a1eaaddfd378748841ac488f35f3133a84c3b.txt", + "total_words": 700, + "top_n_words": [ + { + "word": "the", + "count": 44 + }, + { + "word": "of", + "count": 25 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "be", + "count": 12 + }, + { + "word": "he", + "count": 12 + }, + { + "word": "it", + "count": 12 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "said", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "Why?", + "length": 4 + }, + { + "text": ",' he asked.", + "length": 12 + }, + { + "text": "'I’m tired.", + "length": 13 + }, + { + "text": "Companies will be united.", + "length": 25 + }, + { + "text": "Everything will be united.", + "length": 26 + }, + { + "text": "'I am too old already,' he said.", + "length": 32 + }, + { + "text": "Until we do, the wound won’t heal.", + "length": 36 + }, + { + "text": "'These are old concepts that would remain only on maps.", + "length": 55 + }, + { + "text": "The shapes are freedom, democracy, free market, pluralism.", + "length": 58 + }, + { + "text": "'We destroyed the borders, globalisation is on the horizon.", + "length": 59 + }, + { + "text": "' Of  building new structures, he said: 'It’s Lego bricks.", + "length": 61 + }, + { + "text": "'We need to have similar shapes of those Lego bricks,' he said.", + "length": 63 + }, + { + "text": "'Tomorrow there will be no division to Europe and Asia,' he said.", + "length": 65 + }, + { + "text": "Those who have bad parameters would not be able to unite with anyone.", + "length": 69 + }, + { + "text": "'If Russia gets the right shapes, it will be possible to work with it.", + "length": 70 + }, + { + "text": "IT is a process of structures growing due to the technological progress.", + "length": 72 + }, + { + "text": "'Because after the war, Germany fully confessed to all its dirty tricks.", + "length": 72 + }, + { + "text": "It’s necessary to say once and for all who did something evil, full stop.", + "length": 75 + }, + { + "text": "' 'This is one of Lech Walesa’s exotic ideas,' said ex-premier Jozef Oleksy.", + "length": 78 + }, + { + "text": "'Since Russia is a huge country, a rich country, it has the whole Mendeleev table.", + "length": 82 + }, + { + "text": "'If a state will have a good shape, it will be possible to build various kinds of structures.", + "length": 93 + }, + { + "text": "' Walesa - who called himself 'a leader by nature' - appeared to rule out a return to frontline politics.", + "length": 105 + }, + { + "text": "'There is a broadening of geographical structures, while the economy and democracy should also be altered.", + "length": 106 + }, + { + "text": "'The Germans have done us a lot more evil, and the relationship we have  now is much better than that with Russia.", + "length": 115 + }, + { + "text": "Former Polish President Lech Walesa has demanded an eroding of borders between major nation states on the continent .", + "length": 118 + }, + { + "text": "'He has the role of someone who stimulates ideas, sometimes annoying ideas, but I don’t attach much importance to this one.", + "length": 125 + }, + { + "text": "Everyone would want to work with it, but it has to have the right shapes so that it will be possible to build something with it.", + "length": 128 + }, + { + "text": "' However, he seemed to question Russia’s ability to join this new club despite recently urging closer relations with this former enemy.", + "length": 138 + }, + { + "text": "'We have gone so far in our technical advancements that we are no longer located in our own countries,' he told Russian news agency Itar-Tass.", + "length": 142 + }, + { + "text": "The former Solidarity trade union leader shocked his follow Poles by demanding an eroding of borders between major nation states on the continent.", + "length": 146 + }, + { + "text": "' The ex-Polish president who won the Nobel Peace Prize said:  'My struggle led to the reunification of Germany and the creation of the state of Europe.", + "length": 153 + }, + { + "text": "Claiming the old borders no longer mattered, he said: 'This is the broadening of geographical structures, while the economy and democracy should also be altered.", + "length": 161 + }, + { + "text": "He insisted that the bitter wartime enemies should unite, despite the atrocities inflicted in World War II as 'globalisation is on the horizon' 'The frontiers will not be visible.", + "length": 179 + }, + { + "text": "' The outspoken former politician said Germany 'fully confessed to all its dirty tricks' after the war - including its concentration camps in Auschwitz - so the pair could be united .", + "length": 183 + }, + { + "text": "An extraordinary call for Germany and Poland to unite as a single country at the new heart of Europe came yesterday from Lech Walesa, one of the key figures in the demise of the Cold War.", + "length": 187 + }, + { + "text": "' He insisted that the uniting of his Polish homeland which its bitter wartime enemy Germany was not only possible but the logical conclusion of his own historic role in dismantling the Iron Curtain.", + "length": 199 + }, + { + "text": "'Right now we are stopped by obstacles of different kinds, like various kinds of disparities, different systems, but gradually we’ll be smoothing it down and the states will become like Lego bricks.", + "length": 200 + }, + { + "text": "' Having faced down the might of one old empire - the USSR - he appeared ready in his rambling remarks to trust a new Euroland and, further still, embraced an almost utopian vision of a world without frontiers.", + "length": 210 + }, + { + "text": "We need to expand economic and defence cooperation and other structures to make from the states of Poland and Germany one state of Europe,' said the mustachioed Walesa, 70 this month, who was a staunch opponent of Soviet hegemony in Eastern Europe whose historic industrial action in the Gdansk shipyards ushered in a new era freedom and democracy.", + "length": 348 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7565143704414368 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:00.736025845Z", + "first_section_created": "2025-12-23T09:36:00.73640606Z", + "last_section_published": "2025-12-23T09:36:00.736639269Z", + "all_results_received": "2025-12-23T09:36:00.795895813Z", + "output_generated": "2025-12-23T09:36:00.79606082Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:00.73640606Z", + "publish_time": "2025-12-23T09:36:00.736639269Z", + "first_worker_start": "2025-12-23T09:36:00.737117288Z", + "last_worker_end": "2025-12-23T09:36:00.795013Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:00.737098487Z", + "start_time": "2025-12-23T09:36:00.73716859Z", + "end_time": "2025-12-23T09:36:00.737241993Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:00.737353Z", + "start_time": "2025-12-23T09:36:00.737509Z", + "end_time": "2025-12-23T09:36:00.795013Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:00.737109388Z", + "start_time": "2025-12-23T09:36:00.73718159Z", + "end_time": "2025-12-23T09:36:00.737271594Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:00.737046585Z", + "start_time": "2025-12-23T09:36:00.737117288Z", + "end_time": "2025-12-23T09:36:00.737146289Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4095, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/006a59c662a82db6c90b550b81abc7d7490311aa.json b/data/output/006a59c662a82db6c90b550b81abc7d7490311aa.json new file mode 100644 index 0000000..bad3c72 --- /dev/null +++ b/data/output/006a59c662a82db6c90b550b81abc7d7490311aa.json @@ -0,0 +1,580 @@ +{ + "file_name": "006a59c662a82db6c90b550b81abc7d7490311aa.txt", + "total_words": 1296, + "top_n_words": [ + { + "word": "the", + "count": 68 + }, + { + "word": "and", + "count": 35 + }, + { + "word": "to", + "count": 35 + }, + { + "word": "said", + "count": 32 + }, + { + "word": "in", + "count": 30 + }, + { + "word": "s", + "count": 23 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "are", + "count": 16 + }, + { + "word": "algeria", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "'Mr.", + "length": 4 + }, + { + "text": "Japan .", + "length": 7 + }, + { + "text": "Norway .", + "length": 8 + }, + { + "text": "France .", + "length": 8 + }, + { + "text": "Colombia .", + "length": 10 + }, + { + "text": "Malaysia .", + "length": 10 + }, + { + "text": "\" Romania .", + "length": 11 + }, + { + "text": "official said.", + "length": 14 + }, + { + "text": "United States .", + "length": 15 + }, + { + "text": "United Kingdom .", + "length": 16 + }, + { + "text": "military at a U.", + "length": 16 + }, + { + "text": "Raids turn deadly .", + "length": 19 + }, + { + "text": "official said Sunday.", + "length": 21 + }, + { + "text": "Several hostages died.", + "length": 22 + }, + { + "text": "State Department official.", + "length": 26 + }, + { + "text": "endangering hostages' lives.", + "length": 28 + }, + { + "text": "House Intelligence Committee.", + "length": 29 + }, + { + "text": "Four other Romanians were freed.", + "length": 32 + }, + { + "text": "offers to help during the crisis.", + "length": 33 + }, + { + "text": "naval base in Sigonella, Italy, a U.", + "length": 36 + }, + { + "text": "Three others who had been held are safe.", + "length": 40 + }, + { + "text": "Countries mourn dead, try to track down missing .", + "length": 49 + }, + { + "text": "Six freed Americans left Algeria and one remained.", + "length": 50 + }, + { + "text": "\"A new day without answers has increased our concern.", + "length": 53 + }, + { + "text": "Opinion: Algeria crisis is a wakeup call for America .", + "length": 54 + }, + { + "text": "Colombia's president said a citizen was presumed dead.", + "length": 54 + }, + { + "text": "Read more: Algeria attack may have link to Libya camps .", + "length": 56 + }, + { + "text": "Japan has 10 citizens -- likely affiliated with JGC Corp.", + "length": 57 + }, + { + "text": "Mike Rogers, a Michigan Republican who is chairman of the U.", + "length": 60 + }, + { + "text": "Read more: Mali takes key town as nations ready more troops .", + "length": 61 + }, + { + "text": "Marlboro': The veteran jihadist behind the attack in Algeria .", + "length": 62 + }, + { + "text": "\"They decided they were going to handle it their way,\" said Rep.", + "length": 64 + }, + { + "text": "Undeterred, the government followed with a second push Saturday.", + "length": 64 + }, + { + "text": "Three British citizens were killed, the Foreign Office said Sunday.", + "length": 67 + }, + { + "text": "Britain's BP said Sunday four of its workers remain unaccounted for.", + "length": 68 + }, + { + "text": "One Romanian lost his life, the country's Foreign Ministry said Saturday.", + "length": 73 + }, + { + "text": "Twenty-two other Britons who were taken hostage have safely returned home.", + "length": 74 + }, + { + "text": "\" On Sunday, an American lawmaker said the Algerian government turned down U.", + "length": 77 + }, + { + "text": "\"We know that there are many fatalities,\" Statoil CEO Helge Lund said Sunday.", + "length": 77 + }, + { + "text": "Thursday's military incursion succeeded in freeing some hostages -- but not all.", + "length": 80 + }, + { + "text": "(CNN) -- The hostage crisis in eastern Algeria is over, but the questions remain.", + "length": 81 + }, + { + "text": "Three other British nationals and a UK resident are also \"believed dead,\" he said.", + "length": 82 + }, + { + "text": "\"They did not want us or the other hostage nations involved in the decision-making.", + "length": 83 + }, + { + "text": "The remains of one American hostage were also brought to the base, the official said.", + "length": 85 + }, + { + "text": "That assault killed the remaining hostage-takers but resulted in more hostage deaths.", + "length": 85 + }, + { + "text": "No known French hostages are unaccounted for, France's Defense Ministry said Saturday.", + "length": 86 + }, + { + "text": "\"Search efforts are ongoing at the gas installation, looking for more possible victims.", + "length": 87 + }, + { + "text": "But regional analysts believe it was too sophisticated to have been planned in just days.", + "length": 89 + }, + { + "text": "Some 685 Algerian workers and 107 foreigners were freed, the Algerian Interior Ministry said.", + "length": 93 + }, + { + "text": "Five Norwegians are missing, while eight are safe, Norwegian Prime Minister Jens Stoltenberg said.", + "length": 98 + }, + { + "text": "Such Islamist militant activity is not new to Africa, including recent violence in Mali and Somalia.", + "length": 100 + }, + { + "text": "But the news agency did not post the video, and CNN has not independently confirmed its authenticity.", + "length": 101 + }, + { + "text": "The kidnappers wielded AK-47 rifles and put explosive-laden vests on some hostages, according to a U.", + "length": 101 + }, + { + "text": "In it, Belmoktar said, \"We at al Qaeda are claiming responsibility of this blessed guerrilla operation.", + "length": 103 + }, + { + "text": "At least one American, identified as Frederick Buttaccio, is among the dead, the State Department said.", + "length": 103 + }, + { + "text": "Three hostages were on their way back to Malaysia, the country's state-run news agency reported Sunday.", + "length": 103 + }, + { + "text": "There are still 10 Japanese who have yet to be confirmed safe, JGC -- the engineering firm -- said Sunday.", + "length": 106 + }, + { + "text": "CNN's Paul Cruickshank, Slma Shelbayah, Kevin Bohn, Barbara Starr and Per Nyberg contributed to this report.", + "length": 108 + }, + { + "text": "But there is a \"worrying possibility\" that another is dead, while a fifth is unaccounted for, the agency said.", + "length": 110 + }, + { + "text": "The Islamic extremists also planned to blow up the gas installation and rigged it with mines throughout, the U.", + "length": 111 + }, + { + "text": "On Thursday, Algerian special forces moved in because the government said the militants wanted to flee to Mali.", + "length": 111 + }, + { + "text": "Yet that interest is coupled with pressure to make sure foreign nationals, and their business ventures, are safe.", + "length": 113 + }, + { + "text": "While the military part of the operation is over, the searching and mourning is not for people in countries worldwide.", + "length": 118 + }, + { + "text": "And Norway's Statoil said five of its employees were missing, while 12 others are now home in Norway, Algeria and Canada.", + "length": 121 + }, + { + "text": "\" Belmoktar has communicated with this and other news sites before, said Andrew Lebovich, a Senegal-based security analyst.", + "length": 123 + }, + { + "text": "\"It was clear that the terrorists were determined to escape the country with the captives and to bomb the gas installations.", + "length": 124 + }, + { + "text": "Eleven former hostages -- among them British citizens -- have gotten medical treatment and psychological counseling from the U.", + "length": 127 + }, + { + "text": "Algeria's military found numerous \"foreign military uniforms\" in its sweep of the In Amenas facility, its Interior Ministry said.", + "length": 129 + }, + { + "text": "The attackers came from six countries -- only three were Algerian -- and included Arabs and Africans, Said told state-run Radio Algeria.", + "length": 136 + }, + { + "text": ", an engineering firm that was involved in gas production in In Amenas -- who are yet to be confirmed safe, in addition to a number of dead.", + "length": 140 + }, + { + "text": "\" British Defense Secretary Philip Hammond called the loss of life \"appalling and unacceptable,\" while laying blame solely on the terrorists.", + "length": 141 + }, + { + "text": "\"We are going to strengthen security, and we rely first on our means and resources,\" Yousfi said, according to the official Algerian Press Service.", + "length": 147 + }, + { + "text": "The army intervened \"to avoid a bloody turning point of events in this extremely dangerous situation,\" the Algerian Interior Ministry said Saturday.", + "length": 148 + }, + { + "text": "The hostages were brought from Algeria to the base Friday, the official said, and are being flown to their home countries as their conditions warrant.", + "length": 150 + }, + { + "text": "Algeria said the attack was in retaliation for allowing France to use Algerian airspace for an offensive against Islamist militants in neighboring Mali.", + "length": 152 + }, + { + "text": "In a statement Saturday night, the White House said it was in close contact with Algeria's government to \"gain a fuller understanding of what took place.", + "length": 153 + }, + { + "text": "\" British Foreign Secretary William Hague echoed those remarks, adding his government is \"working hard to get definitive information\" about each individual.", + "length": 156 + }, + { + "text": "Algeria's status as Africa's largest natural gas producer and a major supplier of the product to Europe heightens its importance to those who want to invest there.", + "length": 163 + }, + { + "text": "Youcef Yousfi, Algeria's energy and mining minister, insisted Sunday his country can keep its gas facilities secure and ruled out foreign forces coming in to help.", + "length": 163 + }, + { + "text": "I fear the numbers will be updated with more victims later today when the search operation is expected to end,\" said Mohammed Said, Algeria's communication minister.", + "length": 165 + }, + { + "text": "And Sahara News' report Sunday claimed Belmoktar said \"40 immigrant Jihadists and supporters of Muslim countries\" led the siege in retaliation for the Mali offensive.", + "length": 166 + }, + { + "text": "The In Amenas plant is run by Algeria's state oil company, in cooperation with foreign firms such as Statoil and BP, and because of that employed workers from several countries.", + "length": 177 + }, + { + "text": "One man -- identified as Yann Desjeux -- died after telling the French newspaper Sud Ouest on Thursday that he and 34 other hostages of nine different nationalities were treated well.", + "length": 183 + }, + { + "text": "Among them, exactly how many people are unaccounted for at a remote natural gas facility after three days of chaos that ended Saturday, leaving at least 23 hostages and dozens of Islamist militants dead.", + "length": 203 + }, + { + "text": "Some survivors described their harrowing escapes by rigging up disguises and sneaking to safety with locals, with at least one survivor running for his life with plastic explosives strapped around his neck.", + "length": 206 + }, + { + "text": "Militants in pickup trucks struck the sprawling gas complex about 50 kilometers (30 miles) west of the Libyan border at dawn Wednesday, gathered the Westerners who worked there into a group and tied them up.", + "length": 207 + }, + { + "text": "Mauritania's Sahara Media news agency said Sunday it had a video from Moktar Belmoktar, who leads the Al-Mulathameen Brigade associated with al Qaeda in the Islamic Maghreb that regional media have reported was behind the attack.", + "length": 229 + }, + { + "text": "In addition to combing the sprawling desert site, Algerian forces are searching hospitals and medical centers around the country, as well as towns and villages near the targeted site, according to a statement Sunday from Statoil.", + "length": 229 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8782010972499847 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:01.236990264Z", + "first_section_created": "2025-12-23T09:36:01.23739128Z", + "last_section_published": "2025-12-23T09:36:01.237803797Z", + "all_results_received": "2025-12-23T09:36:01.32989924Z", + "output_generated": "2025-12-23T09:36:01.330162951Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 92, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:01.23739128Z", + "publish_time": "2025-12-23T09:36:01.23763619Z", + "first_worker_start": "2025-12-23T09:36:01.238196812Z", + "last_worker_end": "2025-12-23T09:36:01.31248Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:01.238198712Z", + "start_time": "2025-12-23T09:36:01.238256514Z", + "end_time": "2025-12-23T09:36:01.238334217Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:01.238306Z", + "start_time": "2025-12-23T09:36:01.238524Z", + "end_time": "2025-12-23T09:36:01.31248Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:01.23815611Z", + "start_time": "2025-12-23T09:36:01.238239314Z", + "end_time": "2025-12-23T09:36:01.238363119Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:01.238124009Z", + "start_time": "2025-12-23T09:36:01.238196812Z", + "end_time": "2025-12-23T09:36:01.238250614Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:01.237672291Z", + "publish_time": "2025-12-23T09:36:01.237803797Z", + "first_worker_start": "2025-12-23T09:36:01.238254714Z", + "last_worker_end": "2025-12-23T09:36:01.328885Z", + "total_journey_time_ms": 91, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:01.238198712Z", + "start_time": "2025-12-23T09:36:01.238254714Z", + "end_time": "2025-12-23T09:36:01.238329717Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:01.238543Z", + "start_time": "2025-12-23T09:36:01.238682Z", + "end_time": "2025-12-23T09:36:01.328885Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 90 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:01.238189712Z", + "start_time": "2025-12-23T09:36:01.238301816Z", + "end_time": "2025-12-23T09:36:01.23839832Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:01.238262015Z", + "start_time": "2025-12-23T09:36:01.238316417Z", + "end_time": "2025-12-23T09:36:01.238343018Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 163, + "min_processing_ms": 73, + "max_processing_ms": 90, + "avg_processing_ms": 81, + "median_processing_ms": 90, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4039, + "slowest_section_id": 1, + "slowest_section_time_ms": 91 + } +} diff --git a/data/output/006ad570b6c761fd7bd4c5e5c30a348643733664.json b/data/output/006ad570b6c761fd7bd4c5e5c30a348643733664.json new file mode 100644 index 0000000..f4ab7aa --- /dev/null +++ b/data/output/006ad570b6c761fd7bd4c5e5c30a348643733664.json @@ -0,0 +1,318 @@ +{ + "file_name": "006ad570b6c761fd7bd4c5e5c30a348643733664.txt", + "total_words": 789, + "top_n_words": [ + { + "word": "the", + "count": 36 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "she", + "count": 22 + }, + { + "word": "lakhan", + "count": 19 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "he", + "count": 15 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "his", + "count": 12 + }, + { + "word": "and", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "'He .", + "length": 5 + }, + { + "text": "was never the same again.", + "length": 25 + }, + { + "text": "However, the bill is yet to be passed.", + "length": 38 + }, + { + "text": "'Residential homes are very, very few.", + "length": 38 + }, + { + "text": "'One night, he shook violently,' she said.", + "length": 42 + }, + { + "text": "It sparked concern among charities and police .", + "length": 47 + }, + { + "text": "Above, Lakhan is pictured tied up to the bus stop .", + "length": 51 + }, + { + "text": "He can't talk, so how will he tell anyone if he gets lost?", + "length": 58 + }, + { + "text": "'He is deaf so he would not be able to hear traffic coming.", + "length": 59 + }, + { + "text": "official identity card that would allow her to visit the centre.", + "length": 64 + }, + { + "text": "At the time, Sakubai defended her actions, saying: 'What else can I do?", + "length": 71 + }, + { + "text": "He is now being looked after at a state-run institution in south Mumbai .", + "length": 73 + }, + { + "text": "The pensioner spent years living on the dirty stretch of pavement with her grandson .", + "length": 85 + }, + { + "text": "The nine-year-old cannot speak or hear and suffers from cerebral palsy and epilepsy .", + "length": 85 + }, + { + "text": "Now, Lakhan's grandmother has revealed the heartbreaking truth behind the youngster's plight.", + "length": 93 + }, + { + "text": "Sakubai added that she was hopeful she would get to see Lakahan regularly once she acquired an .", + "length": 96 + }, + { + "text": "At night, she said she would tie Lakhan to her own leg so she would know if he tried to walk away.", + "length": 98 + }, + { + "text": "Following its release last month, the picture of nine-year-old Lakhan Kale captured hearts across the world.", + "length": 108 + }, + { + "text": "' Family: The pensioner said she had 'no choice' but to tie up Lakhan while she was away due to his disabilities .", + "length": 114 + }, + { + "text": "Dressed in blue, the disabled boy lies listlessly on the pavement in Mumbai, his ankle tethered with rope to a bus stop.", + "length": 120 + }, + { + "text": "She said she wants to find Lakhan more of a 'home', but that there is a serious shortage of suitable facilities in Mumbai.", + "length": 122 + }, + { + "text": "As the youngster was led away, his grandmother bid him a tearful goodbye, but said she was 'happy' he was going to be looked after.", + "length": 131 + }, + { + "text": "Carer: Lakhan's grandmother Sakubai Kale (pictured with Lakhan) has revealed the heartbreaking truth behind the youngster's plight.", + "length": 131 + }, + { + "text": "Heartbreaking: Lakhan Kale is pictured lying listlessly on the pavement in Mumbai, his ankle tethered with rope to a pole of a bus stop.", + "length": 136 + }, + { + "text": "Speaking through tears, Sakubai told of how her grandson's disabilities had emerged after he developed a fever at just a few months old.", + "length": 136 + }, + { + "text": "Disabled: Lakhan, who was tied up using a 'long' cloth rope, is pictured strolling along the dirty stretch of pavement where he was raised.", + "length": 139 + }, + { + "text": "She said she had finally made the decision to tie up Lakhan for his own safety, using a 'long' cloth rope which would not damage his ankle.", + "length": 139 + }, + { + "text": "If he ran on to the road, he would get killed,' said the pensioner, who still lives on the pavement, surviving off food she buys from a vendor.", + "length": 143 + }, + { + "text": "Although thousands of pedestrians strolled past Lakhan unwittingly, a photograph of him tied up eventually appeared in a local newspaper last month.", + "length": 148 + }, + { + "text": "There's a major need for the government to do something, a social responsibility to provide residential centres for children like Lakhan,’ she said.", + "length": 150 + }, + { + "text": "Speaking to CNN, she explained how she became Lakhan's sole carer after his father died four years ago, his mother left and his older sister ran away.", + "length": 150 + }, + { + "text": "Following the picture's release, social worker Meena Mutha placed Lakhan in a state-run institution in south Mumbai at the request of a local police constable.", + "length": 159 + }, + { + "text": "Left alone: Speaking to CNN, Subakai explained how she became Lakhan's sole carer after his father died four years ago, his mother left and his older sister ran away.", + "length": 166 + }, + { + "text": "At these times, Sakubai said she had had 'no choice' but to tie Lakhan to the red-coloured poles of bus stop 58, which is shaded by the hanging roots of a banyan tree.", + "length": 167 + }, + { + "text": "Although the institution Lakhan is currently residing at takes in a range of needy children, Ms Mutha said it failed to cater specifically for those with special needs.", + "length": 168 + }, + { + "text": "' Sparking concern: Although thousands of pedestrians strolled past Lakhan unwittingly, a photograph of him tied up eventually appeared in a local newspaper last month.", + "length": 168 + }, + { + "text": "She said she had tried to earn a small living for the pair by selling toys, flower garlands and trinkets on Chowpatty, a renowned beachfront in the heart of the Indian city.", + "length": 173 + }, + { + "text": "Sakubai Kale, who is in her 70s, spent years living in poverty on the dirty stretch of pavement with her grandson, who cannot hear or speak and suffers from cerebral palsy and epilepsy.", + "length": 185 + }, + { + "text": "It sparked concern among charities and the police - with activists claiming he was just one of many disabled people in India facing daily stigma and a lack of facilities to assist them.", + "length": 185 + }, + { + "text": "In February, a long-awaited bill was introduced into the Indian parliament aiming to give disabled people equal rights, including access to education, employment and legal redress against discrimination.", + "length": 203 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6214789152145386 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:01.738569708Z", + "first_section_created": "2025-12-23T09:36:01.74012637Z", + "last_section_published": "2025-12-23T09:36:01.740311377Z", + "all_results_received": "2025-12-23T09:36:01.811718902Z", + "output_generated": "2025-12-23T09:36:01.81192211Z", + "total_processing_time_ms": 73, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 71, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:01.74012637Z", + "publish_time": "2025-12-23T09:36:01.740311377Z", + "first_worker_start": "2025-12-23T09:36:01.740913301Z", + "last_worker_end": "2025-12-23T09:36:01.810777Z", + "total_journey_time_ms": 70, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:01.740875499Z", + "start_time": "2025-12-23T09:36:01.740962703Z", + "end_time": "2025-12-23T09:36:01.741031506Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:01.741185Z", + "start_time": "2025-12-23T09:36:01.741347Z", + "end_time": "2025-12-23T09:36:01.810777Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 69 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:01.740840598Z", + "start_time": "2025-12-23T09:36:01.740913301Z", + "end_time": "2025-12-23T09:36:01.741017305Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:01.740917001Z", + "start_time": "2025-12-23T09:36:01.741009305Z", + "end_time": "2025-12-23T09:36:01.741054207Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 69, + "min_processing_ms": 69, + "max_processing_ms": 69, + "avg_processing_ms": 69, + "median_processing_ms": 69, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4506, + "slowest_section_id": 0, + "slowest_section_time_ms": 70 + } +} diff --git a/data/output/006b4652c00c54dd3469a22bbf64a84e2f77ca81.json b/data/output/006b4652c00c54dd3469a22bbf64a84e2f77ca81.json new file mode 100644 index 0000000..a83aa6e --- /dev/null +++ b/data/output/006b4652c00c54dd3469a22bbf64a84e2f77ca81.json @@ -0,0 +1,246 @@ +{ + "file_name": "006b4652c00c54dd3469a22bbf64a84e2f77ca81.txt", + "total_words": 278, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "that", + "count": 6 + }, + { + "word": "her", + "count": 5 + }, + { + "word": "with", + "count": 5 + }, + { + "word": "an", + "count": 4 + }, + { + "word": "and", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "’ Write caption here .", + "length": 24 + }, + { + "text": "Call it an unsolved mystery.", + "length": 28 + }, + { + "text": "19:31 EST, 17 December 2012 .", + "length": 29 + }, + { + "text": "02:46 EST, 18 December 2012 .", + "length": 29 + }, + { + "text": "The dispute was never legally resolved.", + "length": 39 + }, + { + "text": "’ But the details are still up in the air.", + "length": 44 + }, + { + "text": "'This was not consistent with tea but is believed to be water.", + "length": 62 + }, + { + "text": "‘This was not consistent with tea but is believed to be water.", + "length": 64 + }, + { + "text": "Shannon said that she was hit in the back of her head with a glass of tea.", + "length": 74 + }, + { + "text": "Shannon claimed she reached for the can of air-freshener and sprayed it in the ‘area.", + "length": 87 + }, + { + "text": "In response to the spraying, her husband allegedly threw water on Shannon and then tossed a plastic water bottle at her.", + "length": 120 + }, + { + "text": "After consulting with a local judge, the police officer determined that there was a lack of probable cause to issue an arrest warrant.", + "length": 134 + }, + { + "text": "‘Upon my original arrival to the residence there was a clear liquid on the back door and floor,’ the deputy wrote in the police statement.", + "length": 142 + }, + { + "text": "’ Michael told the deputy tasked with sorting out the story that an ‘upset stomach’ prompted him to pass the gas that infuriated his spouse.", + "length": 146 + }, + { + "text": "Not tea: 'Upon my original arrival to the residence there was a clear liquid on the back door and floor,' the deputy wrote in the police statement.", + "length": 147 + }, + { + "text": "An angered wife exacerbated a pungent domestic dispute after allegedly spraying vanilla-scented Lysol on her husband who passed gas that ‘was bad enough to cause her to almost puke.", + "length": 183 + }, + { + "text": "Michael Manatis, 46, told a local deputy in Spartanburg County, South Carolina, that his wife, Shannon Manatis, 41, had ‘sprayed the Lysol in his eyes,’ according to a police report.", + "length": 186 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.669701874256134 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:02.241332899Z", + "first_section_created": "2025-12-23T09:36:02.241661412Z", + "last_section_published": "2025-12-23T09:36:02.24187482Z", + "all_results_received": "2025-12-23T09:36:02.312156301Z", + "output_generated": "2025-12-23T09:36:02.312283206Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 70, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:02.241661412Z", + "publish_time": "2025-12-23T09:36:02.24187482Z", + "first_worker_start": "2025-12-23T09:36:02.242303337Z", + "last_worker_end": "2025-12-23T09:36:02.311207Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:02.242350639Z", + "start_time": "2025-12-23T09:36:02.242412242Z", + "end_time": "2025-12-23T09:36:02.242445243Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:02.242571Z", + "start_time": "2025-12-23T09:36:02.242724Z", + "end_time": "2025-12-23T09:36:02.311207Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:02.242286937Z", + "start_time": "2025-12-23T09:36:02.242344539Z", + "end_time": "2025-12-23T09:36:02.242392441Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:02.242247335Z", + "start_time": "2025-12-23T09:36:02.242303337Z", + "end_time": "2025-12-23T09:36:02.242320838Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1607, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/006b8c1960fd2ba553cb60e8bf2931159c832c88.json b/data/output/006b8c1960fd2ba553cb60e8bf2931159c832c88.json new file mode 100644 index 0000000..22aa6ce --- /dev/null +++ b/data/output/006b8c1960fd2ba553cb60e8bf2931159c832c88.json @@ -0,0 +1,612 @@ +{ + "file_name": "006b8c1960fd2ba553cb60e8bf2931159c832c88.txt", + "total_words": 1015, + "top_n_words": [ + { + "word": "the", + "count": 69 + }, + { + "word": "in", + "count": 38 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "was", + "count": 20 + }, + { + "word": "said", + "count": 16 + }, + { + "word": "felt", + "count": 15 + }, + { + "word": "it", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "to", + "count": 14 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "A .", + "length": 3 + }, + { + "text": "He .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "off.", + "length": 4 + }, + { + "text": "A 6.", + "length": 4 + }, + { + "text": "He .", + "length": 4 + }, + { + "text": "25pm.", + "length": 5 + }, + { + "text": "'One .", + "length": 6 + }, + { + "text": "' Phil .", + "length": 8 + }, + { + "text": "Tremors .", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "1 on the .", + "length": 10 + }, + { + "text": "21pm today.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "the building.", + "length": 13 + }, + { + "text": "the building.", + "length": 13 + }, + { + "text": "Lucy Crossley .", + "length": 15 + }, + { + "text": "22GMT) in Exeter.", + "length": 17 + }, + { + "text": "Lasted 3 seconds.", + "length": 17 + }, + { + "text": "#Devon #earthquake.", + "length": 19 + }, + { + "text": "#BurryPort #Llanelli.", + "length": 21 + }, + { + "text": "Slightly felt outside.", + "length": 22 + }, + { + "text": "1 on the Richter scale.", + "length": 23 + }, + { + "text": "1 on the Richter scale .", + "length": 24 + }, + { + "text": "I was sat having a cuppa in .", + "length": 29 + }, + { + "text": "12:03 EST, 20 February 2014 .", + "length": 29 + }, + { + "text": "09:38 EST, 20 February 2014 .", + "length": 29 + }, + { + "text": "#devon #barnstaple #earthquake.", + "length": 31 + }, + { + "text": "Death toll usually over 50,000.", + "length": 31 + }, + { + "text": "The definition of a quake of 4.", + "length": 31 + }, + { + "text": "had ploughed into their building.", + "length": 33 + }, + { + "text": "Sally Norden, another Barnstaple .", + "length": 34 + }, + { + "text": "really heavy truck going up the road\".", + "length": 38 + }, + { + "text": "We felt it in the building in Swansea.", + "length": 38 + }, + { + "text": "my front room and felt the earth move.", + "length": 38 + }, + { + "text": "Permanent changes in ground topography.", + "length": 39 + }, + { + "text": "10,000 to 15,000 per year in the world.", + "length": 39 + }, + { + "text": "Generally causes none to minimal damage.", + "length": 40 + }, + { + "text": "There were no reports of casualties or .", + "length": 40 + }, + { + "text": "Felt by most people in the affected area.", + "length": 41 + }, + { + "text": "' The magnitude of the tremor mean it is .", + "length": 42 + }, + { + "text": "resident, said: 'I thought I was going mad.", + "length": 43 + }, + { + "text": "smashed lamps and books falling off shelves.", + "length": 44 + }, + { + "text": "Moderate to significant damage very unlikely.", + "length": 45 + }, + { + "text": "20pm today, to a 'heavy truck going up the road'.", + "length": 49 + }, + { + "text": "It shook and wobbled and no one knew what it was.", + "length": 49 + }, + { + "text": "Some objects may fall off shelves or be knocked over.", + "length": 53 + }, + { + "text": "Richter scale struck near Market Rasen, Lincolnshire.", + "length": 53 + }, + { + "text": "Heavy damage and shaking extends to distant locations.", + "length": 54 + }, + { + "text": "Things were shaking on the shelves and he ran outside.", + "length": 54 + }, + { + "text": "' Bay FM radio in Exmouth said the radio studios shook.", + "length": 55 + }, + { + "text": "' Another user said: 'Earthquake Tremor in Wales today.", + "length": 55 + }, + { + "text": "It was quite scary and I was left feeling disorientated.", + "length": 56 + }, + { + "text": "reports from residents throughout Devon and South Wales.", + "length": 56 + }, + { + "text": "' Ant Veal tweeted: 'Noticeable earth tremor just now (13.", + "length": 58 + }, + { + "text": "7 which hit Dudley in the West Midlands in September 2002.", + "length": 58 + }, + { + "text": "The shock was also measured in Wales, Cornwall and Jersey.", + "length": 58 + }, + { + "text": "Jobcentre building at the same time as reported by others.", + "length": 58 + }, + { + "text": "' Other significant earthquakes to rock the UK include a 4.", + "length": 59 + }, + { + "text": "3 magnitude quake in Folkestone Kent in April 2007, and a 4.", + "length": 60 + }, + { + "text": "1 magnitude quake was recorded in the North Sea in June 1931.", + "length": 61 + }, + { + "text": "the biggest felt in the UK since 2008 when a quake measuring 5.", + "length": 63 + }, + { + "text": "' People in South Wales also reported feeling the ground shake.", + "length": 63 + }, + { + "text": "'The whole building shook and people thought something had hit .", + "length": 64 + }, + { + "text": "Llanelli, Barnstable, Gloucester and South Molton, the BGS said.", + "length": 64 + }, + { + "text": "'There was some proper swaying on the shelves and filing cabinets.", + "length": 66 + }, + { + "text": "were felt in areas including Dartmoor, Bristol, Taunton, Swansea, .", + "length": 67 + }, + { + "text": "Aageson, who works at Springfield Court, a facility for people with .", + "length": 69 + }, + { + "text": "looked outside and the birds all seemed to be very noisy and flying .", + "length": 69 + }, + { + "text": "said: 'The whole building shook and people thought something had hit .", + "length": 70 + }, + { + "text": "seconds and it was very frightening\" and another said \"it felt like a .", + "length": 71 + }, + { + "text": "describing \"the house was rocking\", another described \"lasted several .", + "length": 71 + }, + { + "text": "said: 'We felt it pretty strongly in the upper floors of the Barnstaple .", + "length": 73 + }, + { + "text": "damage to buildings but people did tweet details of broken flower pots, .", + "length": 73 + }, + { + "text": "spokesman said: \"At this time the BGS have received several macroseismic .", + "length": 74 + }, + { + "text": "mental health issues in Barnstaple, said anxious staff thought a vehicle .", + "length": 74 + }, + { + "text": "It shook and wobbled and no one knew what it was'Phil Aageson, Barnstaple .", + "length": 75 + }, + { + "text": "Twitter user Robin Beer said: 'Anybody else feel the earth tremor just now?!", + "length": 76 + }, + { + "text": "One resident in Birdgwater, Somerset, claimed she felt her work building 'swaying'.", + "length": 83 + }, + { + "text": "But others likened the tremor, which originated in the Bristol Channel at around 1.", + "length": 83 + }, + { + "text": "' Shaken Brendan Garnett-Frizell felt the quake from inside an office in Barnstaple.", + "length": 84 + }, + { + "text": "1 on the richter scale is: 'Noticeable shaking of indoor objects and rattling noises.", + "length": 85 + }, + { + "text": "Someone using the name @BuskingWomble wrote: 'Blimey did anyone else feel that tremor?", + "length": 86 + }, + { + "text": "'It is an area in the Bristol Channel we know to have seismological action,' she said.", + "length": 86 + }, + { + "text": "Homeowners reported their houses rocking and others have described the quake as 'frightening'.", + "length": 94 + }, + { + "text": "The biggest earthquake in six years has hit the South West of the UK with the tremor measuring 4.", + "length": 97 + }, + { + "text": "Tremor: Real-time Seismograph data from a survey station in Swindon showing a large seismic activity at 1.", + "length": 106 + }, + { + "text": "'One guy called and said he was in his house when he felt the earthquake - he said it was a bit of a surprise.", + "length": 110 + }, + { + "text": "' Glenn Watts added: 'Small tremor felt in Exeter, lasted 5 seconds armchairs wobbled as did the tv on its stand.", + "length": 113 + }, + { + "text": "Susanne Sargeant, from the British Geological Survey, said it was 'not unusual' to get quakes in the Bristol Channel.", + "length": 117 + }, + { + "text": "' Twitter user Paul Clews posted a picture of a broken lamp, saying that the damage occurred as a result of the tremor.", + "length": 119 + }, + { + "text": "However, a factor nine Earthquake is defined by: Near or at total destruction - severe damage or collapse to all buildings.", + "length": 123 + }, + { + "text": "Shaken: Twitter user Paul Clews posted a picture of a broken lamp, saying that the damage occurred as a result of the tremor .", + "length": 126 + }, + { + "text": "Today many people took to social networks including Twitter, reporting that they had felt the ground move for around 10 seconds.", + "length": 128 + }, + { + "text": "'We do see earthquakes here from time to time and the last one was at Hartland Point in 2001 - that's 50km south west of today's earthquake.", + "length": 140 + }, + { + "text": "Some coastal towns in south-west Britain were shaken by the quake, which the British Geological Survey (BGS) said had originated in the Bristol Channel.", + "length": 152 + }, + { + "text": "Quake: Experts say the biggest earthquake in six years has hit the South West of the UK with the tremor, originating in the Bristol Channel, measuring 4.", + "length": 153 + }, + { + "text": "Emma Larkworthy, who lives in Barnstaple, Devon, around ten 25 miles from the epicentre, said: \"My daughter Nadine and I felt tremors and the house shudder about 1.", + "length": 164 + }, + { + "text": "Movement: Data from a survey station in North Devon showing a large seismic activity as people in the area reported feeling the ground shake for around 10 seconds .", + "length": 164 + }, + { + "text": "'It was quite scary hearing what we thought was a loud bang then all the furniture shook then hearing like a rumble ratting and we couldn't fathom out where it was coming from.", + "length": 176 + }, + { + "text": "That was the largest to hit Britain since 1984 and  was felt across areas including Merseyside, Birmingham, Leicestershire, Bedfordshire, Northampton, Norfolk, Surrey and Greater London.", + "length": 187 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6294055432081223 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:02.742637932Z", + "first_section_created": "2025-12-23T09:36:02.742953045Z", + "last_section_published": "2025-12-23T09:36:02.743390462Z", + "all_results_received": "2025-12-23T09:36:02.839958091Z", + "output_generated": "2025-12-23T09:36:02.840141798Z", + "total_processing_time_ms": 97, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 96, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:02.742953045Z", + "publish_time": "2025-12-23T09:36:02.743252856Z", + "first_worker_start": "2025-12-23T09:36:02.743673173Z", + "last_worker_end": "2025-12-23T09:36:02.839021Z", + "total_journey_time_ms": 96, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:02.743889582Z", + "start_time": "2025-12-23T09:36:02.743986085Z", + "end_time": "2025-12-23T09:36:02.74411029Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:02.744057Z", + "start_time": "2025-12-23T09:36:02.744192Z", + "end_time": "2025-12-23T09:36:02.839021Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 94 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:02.74358717Z", + "start_time": "2025-12-23T09:36:02.743673173Z", + "end_time": "2025-12-23T09:36:02.743763977Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:02.743698374Z", + "start_time": "2025-12-23T09:36:02.743784778Z", + "end_time": "2025-12-23T09:36:02.74384538Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:02.743316559Z", + "publish_time": "2025-12-23T09:36:02.743390462Z", + "first_worker_start": "2025-12-23T09:36:02.743885882Z", + "last_worker_end": "2025-12-23T09:36:02.815537Z", + "total_journey_time_ms": 72, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:02.743912683Z", + "start_time": "2025-12-23T09:36:02.743964585Z", + "end_time": "2025-12-23T09:36:02.743993986Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:02.744046Z", + "start_time": "2025-12-23T09:36:02.744188Z", + "end_time": "2025-12-23T09:36:02.815537Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 71 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:02.74384708Z", + "start_time": "2025-12-23T09:36:02.743885882Z", + "end_time": "2025-12-23T09:36:02.743913983Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:02.743876581Z", + "start_time": "2025-12-23T09:36:02.743919583Z", + "end_time": "2025-12-23T09:36:02.743931083Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 165, + "min_processing_ms": 71, + "max_processing_ms": 94, + "avg_processing_ms": 82, + "median_processing_ms": 94, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3002, + "slowest_section_id": 0, + "slowest_section_time_ms": 96 + } +} diff --git a/data/output/006bca7d18b3c6889ce567133566d22e491d27c1.json b/data/output/006bca7d18b3c6889ce567133566d22e491d27c1.json new file mode 100644 index 0000000..d7e85e6 --- /dev/null +++ b/data/output/006bca7d18b3c6889ce567133566d22e491d27c1.json @@ -0,0 +1,294 @@ +{ + "file_name": "006bca7d18b3c6889ce567133566d22e491d27c1.txt", + "total_words": 719, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "his", + "count": 23 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "he", + "count": 16 + }, + { + "word": "chinese", + "count": 11 + }, + { + "word": "zao", + "count": 11 + }, + { + "word": "that", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "\" \"Yes,\" I answered.", + "length": 20 + }, + { + "text": "\"It's hard to translate between them.", + "length": 37 + }, + { + "text": "Painting must express these feelings.", + "length": 37 + }, + { + "text": "\"He represents himself, and that is enough.", + "length": 43 + }, + { + "text": "Zao worked hard to find his artistic voice.", + "length": 43 + }, + { + "text": "\" An artist friend once asked about my research.", + "length": 48 + }, + { + "text": "Faced with this challenge, \"It's easier to learn English!", + "length": 57 + }, + { + "text": "Sometimes you must wear yourself out trying to understand.", + "length": 58 + }, + { + "text": "Hearing that I studied Zao Wou-ki, he grew suddenly pensive.", + "length": 60 + }, + { + "text": "\"French thought and Chinese thought are not the same,\" he told me.", + "length": 66 + }, + { + "text": "For Zao and his contemporaries, Paris represented the source of modern art.", + "length": 75 + }, + { + "text": "He wished to be appreciated on his own merits and not to fall victim to stereotype.", + "length": 83 + }, + { + "text": "\"Zao Wou-ki,\" he mused, \"his work isn't representative of either Chinese or French art.", + "length": 87 + }, + { + "text": "Instead, he titled his works with their date of completion, marking their entry into the world.", + "length": 95 + }, + { + "text": "In just a few years, he established himself as an integral member of the postwar French art world.", + "length": 98 + }, + { + "text": "Zao's given name, \"Wou-ki\" (or \"Wuji\" in the standard Hanyu Pinyin romanization used in China), means \"no boundaries.", + "length": 117 + }, + { + "text": "In the decades that followed, Zao committed himself fully to abstract painting, rarely using even figurative titles after 1959.", + "length": 127 + }, + { + "text": "(CNN) -- The Chinese-French painter Zao Wou-ki once told me that painting expresses the thoughts we struggle to put into words.", + "length": 127 + }, + { + "text": "\" he joked, his wit shining through, even though Alzheimer's disease had already begun its slow, relentless onslaught on his mind.", + "length": 130 + }, + { + "text": "Born in Beijing in 1920, he formed part of the second generation of Chinese artists to turn westward in their search for inspiration.", + "length": 133 + }, + { + "text": "Although he did not know it at the time, the move would be permanent, due in part to the rapidly changing political situation in China.", + "length": 135 + }, + { + "text": "Living there meant direct access to the paintings that he had until then only encountered as black-and-white reproductions in art magazines.", + "length": 140 + }, + { + "text": "\" No single phrase better encapsulates the union in his person and art of the two often disparate cultures and aesthetic visions of France and China.", + "length": 149 + }, + { + "text": "An oil painter by vocation, he immersed himself in the riches that surrounded him -- heading directly to the Louvre on the very day he arrived in the city.", + "length": 155 + }, + { + "text": "Zao, widely regarded as one of the foremost Chinese contemporary painters of the 20th century, passed away at his home in Switzerland on Tuesday at the age of 93.", + "length": 162 + }, + { + "text": "Apart from brief trips abroad, Zao would remain in France until the year before his death, one of the few Chinese artists from his generation to emigrate to Europe.", + "length": 164 + }, + { + "text": "Embraced by France, he was elected to the prestigious Academie des Beaux Arts society in 2002 and received the Legion of Honor in 2006 from then-president Jacques Chirac.", + "length": 170 + }, + { + "text": "In Zao's case, this phenomenon is perhaps best understood as a self-statement: the artist's insistence on his personal and aesthetic identity in the face of the vagaries of borders and time.", + "length": 190 + }, + { + "text": "Encouraged by the French-educated Chinese artist Lin Fengmian, his teacher at the prestigious Hangzhou National College of Art (today the China Academy of Art), he relocated to Paris in 1948.", + "length": 191 + }, + { + "text": "At first he made a determined effort to distance himself from ink painting -- the medium most closely associated with the Chinese painting tradition-- and subject matter that might be construed as overtly Chinese.", + "length": 213 + }, + { + "text": "The apparent disorder of his paintings concealed an underlying structure, sometimes described as Daoist in nature, which bore striking parallels to a similar balance between order and chaos found in Chinese traditional painting.", + "length": 228 + }, + { + "text": "Meanwhile, with the assistance of his friend and mentor, noted poet and painter Henri Michaux, and blessed with the warm charm and wit that would impress me decades later, Zao cultivated an extensive circle of fellow artists and cultural figures.", + "length": 246 + }, + { + "text": "His breakthrough, however, came with his 1954 masterpiece \"Wind,\" a painting that was both his first purely abstract work and a return to his origins: the inky black forms rising in two wavering columns are abstractions of oracle bone characters -- the most ancient of Chinese scripts.", + "length": 285 + }, + { + "text": "The lyrical qualities that defined him as an artist appeared early on, first in his oil paintings and later in his ink paintings, after his reengagement with the medium in the early 1970s: oscillating planes of color, light, and shade met, collided, and diverged, skidding across the surface of his works.", + "length": 305 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.453817218542099 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:03.244157974Z", + "first_section_created": "2025-12-23T09:36:03.245391024Z", + "last_section_published": "2025-12-23T09:36:03.245662635Z", + "all_results_received": "2025-12-23T09:36:03.311694295Z", + "output_generated": "2025-12-23T09:36:03.311857302Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:03.245391024Z", + "publish_time": "2025-12-23T09:36:03.245662635Z", + "first_worker_start": "2025-12-23T09:36:03.246195257Z", + "last_worker_end": "2025-12-23T09:36:03.310866Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:03.246185156Z", + "start_time": "2025-12-23T09:36:03.246262859Z", + "end_time": "2025-12-23T09:36:03.246397865Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:03.246404Z", + "start_time": "2025-12-23T09:36:03.246557Z", + "end_time": "2025-12-23T09:36:03.310866Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:03.246206557Z", + "start_time": "2025-12-23T09:36:03.24628516Z", + "end_time": "2025-12-23T09:36:03.246386064Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:03.246120354Z", + "start_time": "2025-12-23T09:36:03.246195257Z", + "end_time": "2025-12-23T09:36:03.246233958Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4304, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/006d24c5e3a873f541e5351e7719d2c131d8e2d3.json b/data/output/006d24c5e3a873f541e5351e7719d2c131d8e2d3.json new file mode 100644 index 0000000..5b3eb32 --- /dev/null +++ b/data/output/006d24c5e3a873f541e5351e7719d2c131d8e2d3.json @@ -0,0 +1,230 @@ +{ + "file_name": "006d24c5e3a873f541e5351e7719d2c131d8e2d3.txt", + "total_words": 300, + "top_n_words": [ + { + "word": "the", + "count": 13 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "we", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "wenger", + "count": 6 + }, + { + "word": "have", + "count": 5 + }, + { + "word": "qpr", + "count": 5 + }, + { + "word": "are", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Again, it's down to our performances.", + "length": 37 + }, + { + "text": "'We expect a QPR with nothing to lose.", + "length": 38 + }, + { + "text": "West Ham at the moment are in very good shape.", + "length": 46 + }, + { + "text": "They will come and have a go at us,' Wenger said.", + "length": 49 + }, + { + "text": "'We go away to Southampton, which is a very difficult as well.", + "length": 62 + }, + { + "text": "'We played QPR two years ago and they were very difficult games.", + "length": 64 + }, + { + "text": "In the Premier League every game is difficult and you just want to turn up.", + "length": 75 + }, + { + "text": "'You focus on your own performance and don't expect any weakness from your opponent.", + "length": 84 + }, + { + "text": "'Every team has difficult games, so it is the consistency in that period that will matter.", + "length": 90 + }, + { + "text": "Charlie Austin, who scored a hat-trick at the weekend, will be QPR's main threat on Boxing Day .", + "length": 96 + }, + { + "text": "'We have a good opportunity over Christmas to show that we are much better than people think we are.", + "length": 100 + }, + { + "text": "Arsenal conceded late against 10-man Liverpool last weekend to leave them sitting sixth in the table .", + "length": 102 + }, + { + "text": "The return of Olivier Giroud in recent games has aided Wenger's striking options alongside Alexis Sanchez .", + "length": 107 + }, + { + "text": "Under-pressure manager Arsene Wenger hopes his side can show their true potential in the next three games .", + "length": 107 + }, + { + "text": "'We have to make sure that over Christmas we take advantage of the schedule we have and put the performances in,' Wenger said.", + "length": 126 + }, + { + "text": "' QPR have yet to register an away point this season, but Wenger refuses to take victory at the Emirates Stadium on Friday for granted.", + "length": 135 + }, + { + "text": "Arsene Wenger has urged his Arsenal team to use their Christmas fixtures to prove they are in a false position in the Barclays Premier League.", + "length": 142 + }, + { + "text": "The sixth-placed Gunners face QPR on Boxing Day and then travel to West Ham and Southampton, clubs who sit directly above them in the table and rivals for Champions League qualification.", + "length": 186 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5323813557624817 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:03.746489512Z", + "first_section_created": "2025-12-23T09:36:03.74695333Z", + "last_section_published": "2025-12-23T09:36:03.747166039Z", + "all_results_received": "2025-12-23T09:36:03.805499089Z", + "output_generated": "2025-12-23T09:36:03.805652395Z", + "total_processing_time_ms": 59, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:03.74695333Z", + "publish_time": "2025-12-23T09:36:03.747166039Z", + "first_worker_start": "2025-12-23T09:36:03.74769566Z", + "last_worker_end": "2025-12-23T09:36:03.804623Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:03.747743562Z", + "start_time": "2025-12-23T09:36:03.747807765Z", + "end_time": "2025-12-23T09:36:03.747833366Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:03.748022Z", + "start_time": "2025-12-23T09:36:03.74815Z", + "end_time": "2025-12-23T09:36:03.804623Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:03.747704861Z", + "start_time": "2025-12-23T09:36:03.747752662Z", + "end_time": "2025-12-23T09:36:03.747794264Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:03.747629158Z", + "start_time": "2025-12-23T09:36:03.74769566Z", + "end_time": "2025-12-23T09:36:03.747710461Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1663, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/006d866e55170e8c1b9f832da38579376a7e5df3.json b/data/output/006d866e55170e8c1b9f832da38579376a7e5df3.json new file mode 100644 index 0000000..b8c1001 --- /dev/null +++ b/data/output/006d866e55170e8c1b9f832da38579376a7e5df3.json @@ -0,0 +1,282 @@ +{ + "file_name": "006d866e55170e8c1b9f832da38579376a7e5df3.txt", + "total_words": 600, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "with", + "count": 10 + }, + { + "word": "fit", + "count": 9 + }, + { + "word": "workout", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "up", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "MIC: FIT .", + "length": 10 + }, + { + "text": "Of course you do!", + "length": 17 + }, + { + "text": "Struggling, Spenny?", + "length": 19 + }, + { + "text": "Pre-order your £19.", + "length": 20 + }, + { + "text": "Want bodies like ours?", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Britain's poshest workout?", + "length": 26 + }, + { + "text": "Clearly he abs-solutely loves it.", + "length": 33 + }, + { + "text": "99 copy now and join the conversation at #MICFIT .", + "length": 50 + }, + { + "text": "Now you can, thanks to Made In Chelsea's official fitness DVD.", + "length": 62 + }, + { + "text": "MIC:FIT is released on DVD and Digital HD through Universal Pictures on 1st December.", + "length": 85 + }, + { + "text": "Former flames Lucy and Spencer get sweaty in the DVD, which lands in stores next month .", + "length": 88 + }, + { + "text": "Want to emulate Spencer Matthews' rippling abs or achieve a stomach as toned as Lucy Watson's?", + "length": 94 + }, + { + "text": "Fit Felstead: Binky shares her favourite Tabata Wake Up Call Workout - a four-minute intense workout .", + "length": 102 + }, + { + "text": "It not only works on your core but also helps with your recovery from the more strenuous routines, he assures.", + "length": 110 + }, + { + "text": "Proudlock and Binky have created workout with personal trainers they think will keep you fit whilst having fun .", + "length": 112 + }, + { + "text": "Press up Proudlock: Ollie introduces a special yoga and Pilates-inspired workout called the Chelsea Morning Power Stretch .", + "length": 124 + }, + { + "text": "And then there's hipster Proudlock, who introduces a special yoga and Pilates-inspired workout called the Chelsea Morning Power Stretch.", + "length": 136 + }, + { + "text": "It focuses on getting all the right bits toned before you put on that bikini or beachwear - something the actress-turned-model knows all about.", + "length": 143 + }, + { + "text": "This routine is designed to work on the abs, arms and legs by alternating periods of short, intense anaerobic exercise with less intense recovery periods.", + "length": 154 + }, + { + "text": "Get the Chelsea look: The foursome are hoping to help kick-start your fitness regime ready for the Christmas party season and New Year with their new DVD .", + "length": 155 + }, + { + "text": "Meanwhile, Spencer pretends he's not in pain whilst working up an energising sweat with a 20-minute High Intensity Interval Training routine, also known as HIIT .", + "length": 162 + }, + { + "text": "Meanwhile, Spencer pretends he’s not in pain whilst working up an energising sweat with a 20-minute High Intensity Interval Training routine, also known as HIIT.", + "length": 163 + }, + { + "text": "Despite Lucy’s claims she’s biologically incapable of sweating, she proves otherwise with her Beach Body Workout that uses just bodyweight without any equipment.", + "length": 165 + }, + { + "text": "Fit in Chelsea: Binky Felstead, Spencer Matthews, Ollie Proudlock and Lucy Watson have joined forces with the UK's top trainers to create their very own fitness DVD.", + "length": 165 + }, + { + "text": "Ollie Proudlock, Binky Felstead, Spencer Matthews and Lucy Watson are hoping to help kick-start your fitness regime ready for the Christmas party season and New Year.", + "length": 166 + }, + { + "text": "First up is Binky Felstead, who shares her favourite Tabata Wake Up Call Workout - a four-minute intense workout she promises is super fun whilst burning lots of calories.", + "length": 171 + }, + { + "text": "The DVD, which lands on December 1, promises to reveal the cast members' secrets to 'looking hot with a hectic lifestyle and how they keep fit when not partying in Chelsea'.", + "length": 173 + }, + { + "text": "The stars of TV's poshest show - and a host of top trainers - have joined forces for the E4 show’s first official fitness programme, MIC:FIT - and MailOnline can exclusively reveal a first look at the trailer and behind the scenes images.", + "length": 240 + }, + { + "text": "The Chelsea clan are currently filming for the latest series, which - quite fittingly - sees Binky dating a personal trainer, Proudlock rocking crucifix earrings, Spencer living up to his lothario status and Lucy getting up in everyone's grill.", + "length": 244 + }, + { + "text": "'Featuring the hottest fitness trends with four fun, high-energy 20-minute workouts that will have you looking toned for the King's Road, MIC:FIT is perfect to fit into your busy schedule: it gets results and will help keep you looking and feeling amazing during the festive season and beyond,' reads the description.", + "length": 317 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.49738287925720215 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:04.247929113Z", + "first_section_created": "2025-12-23T09:36:04.248267926Z", + "last_section_published": "2025-12-23T09:36:04.248455534Z", + "all_results_received": "2025-12-23T09:36:04.313262245Z", + "output_generated": "2025-12-23T09:36:04.313411051Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:04.248267926Z", + "publish_time": "2025-12-23T09:36:04.248455534Z", + "first_worker_start": "2025-12-23T09:36:04.249124661Z", + "last_worker_end": "2025-12-23T09:36:04.312451Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:04.249149762Z", + "start_time": "2025-12-23T09:36:04.249208164Z", + "end_time": "2025-12-23T09:36:04.249265567Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:04.24942Z", + "start_time": "2025-12-23T09:36:04.249562Z", + "end_time": "2025-12-23T09:36:04.312451Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:04.249064859Z", + "start_time": "2025-12-23T09:36:04.249124661Z", + "end_time": "2025-12-23T09:36:04.249209364Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:04.249088459Z", + "start_time": "2025-12-23T09:36:04.249154762Z", + "end_time": "2025-12-23T09:36:04.249205464Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3581, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/006dd02051a6d228fa3841ab555fced21fccdf1e.json b/data/output/006dd02051a6d228fa3841ab555fced21fccdf1e.json new file mode 100644 index 0000000..3075018 --- /dev/null +++ b/data/output/006dd02051a6d228fa3841ab555fced21fccdf1e.json @@ -0,0 +1,330 @@ +{ + "file_name": "006dd02051a6d228fa3841ab555fced21fccdf1e.txt", + "total_words": 839, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "in", + "count": 33 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "he", + "count": 20 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "ikrima", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "that", + "count": 15 + }, + { + "word": "al", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "forces.", + "length": 7 + }, + { + "text": "Al-Shabaab in Norway .", + "length": 22 + }, + { + "text": "He later returned to Somalia.", + "length": 29 + }, + { + "text": "troops came under heavy fire.", + "length": 29 + }, + { + "text": "Recruiting operatives in the West?", + "length": 34 + }, + { + "text": "Inside story on an Al-Shabaab commander .", + "length": 41 + }, + { + "text": "\"That's what we have to watch these days.", + "length": 41 + }, + { + "text": "It's believed that he escaped after the U.", + "length": 42 + }, + { + "text": "officials said Ikrima was the target of a raid earlier this month by U.", + "length": 71 + }, + { + "text": "He said he is confident that it's the same person who was targeted by U.", + "length": 72 + }, + { + "text": "Navy SEALs on an Al-Shabaab compound near the town of Baraawe in Somalia.", + "length": 73 + }, + { + "text": "Norwegian authorities have not yet released the Norwegian citizen's name.", + "length": 73 + }, + { + "text": "In 2008 they heard that he was in Somalia, where he has been based since.", + "length": 73 + }, + { + "text": "Kenyan authorities suspect Ikrima of involvement with the Westgate mall attack.", + "length": 79 + }, + { + "text": "He had failed to gain asylum status but had been given Norwegian travel papers.", + "length": 79 + }, + { + "text": "However, this development brought its own problems, Hansen said, and not just in Norway.", + "length": 88 + }, + { + "text": "Storm, who is Danish, described Ikrima as a Somali-Kenyan Al-Shabaab operative who had spent time in Norway.", + "length": 108 + }, + { + "text": "Skjaerstad, who reports on security and terrorism for TV2, said Ikrima had lived there between 2004 and 2008.", + "length": 109 + }, + { + "text": "Al-Shabaab claimed responsibility for the bloody four-day siege at the upscale mall in Nairobi, where at least 67 people died.", + "length": 126 + }, + { + "text": "The sources, who had kept up with him over the years, said Ikrima traveled in 2007 to London, where they lost contact with him.", + "length": 127 + }, + { + "text": "He came to Norway at age 8 or 9 and stayed for a couple of years, during which time he gained Norwegian citizenship, Hansen said.", + "length": 129 + }, + { + "text": "He said that Ikrima made clear to him via e-mail that he was ready to send recruits from the West back home from Somalia to launch attacks.", + "length": 139 + }, + { + "text": "Skjaerstad told CNN that according to his sources, Ikrima had traveled to Somalia while living in Norway and had used about a dozen aliases.", + "length": 140 + }, + { + "text": "Arabic is among the six languages spoken by Ikrima, and he studied French for two years at the Alliance Francais in Nairobi, his friends say.", + "length": 141 + }, + { + "text": "Norwegian journalist Bent Skjaerstad told CNN his sources have confirmed that Ikrima had indeed spent time in Norway and had tried to recruit for Al-Shabaab in Europe.", + "length": 167 + }, + { + "text": "Friends of Ikrima who knew him from his time growing up on the Nairobi suburb of Eastleigh told CNN he traveled to Norway in 2003 and grew increasingly radicalized there.", + "length": 170 + }, + { + "text": "Norwegian intelligence services are in Kenya investigating Ikrima and the Norwegian citizen, the Kenyan sources said, and have also spoken to the latter's sister in Norway.", + "length": 172 + }, + { + "text": "The possible involvement of the Norwegian citizen in the Westgate mall attack has highlighted concerns about the widening reach of the Al-Shabaab group outside Somali borders.", + "length": 175 + }, + { + "text": "Kenyan counterterrorism sources are looking at a Norwegian citizen of Somali descent as a possible suspect in the Westgate mall attack last month, the sources told CNN on Friday.", + "length": 178 + }, + { + "text": "Al-Shabaab became quite popular among some Somali community groups in Norway from 2007 to 2009, Hansen said, \"because they were wrongly seen as some kind of national resistance group.", + "length": 183 + }, + { + "text": "The Norwegian citizen is believed to have ties to Mohamed Abdikadir Mohamed, known as Ikrima, who is regarded as one of the most dangerous commanders in the Somali terror group Al-Shabaab.", + "length": 188 + }, + { + "text": "\" Observers noticed contradictions between what the group said in its English- and Arabic-language messaging, he said, which contributed to ignorance within the diaspora about its real nature.", + "length": 192 + }, + { + "text": "Morten Storm, a former informant who has worked for several Western intelligence agencies, has told CNN that he developed a close relationship with an Al-Shabaab figure called Ikrima between 2008 and 2012.", + "length": 205 + }, + { + "text": "\"But the terrorist attacks inside of Somalia made it easier for the wider ethnic Somali community to see that this was really a terrorist organization, and it distanced itself,\" he said, making it less popular now.", + "length": 214 + }, + { + "text": "Kenyan intelligence sources say that Ikrima, who speaks six languages and grew up in Kenya, is the main \"point person\" between al Qaeda in Somalia and al Qaeda in the Arabian Peninsula, and that he has helped pinpoint Kenyan targets.", + "length": 233 + }, + { + "text": "Stig Hansen, a security expert based in Norway and author of the book \"Al-Shabaab in Somalia,\" told CNN that if the Norwegian suspect is who he believes him to be, he lived in a small town in Norway but had connections with a wider group, not all of Somali origin.", + "length": 264 + }, + { + "text": "A Kenyan intelligence dossier seen by CNN alleges Ikrima's involvement with Briton Samantha Lewthwaite, a terror suspect known as the \"White Widow,\" in a foiled Mombasa attack in 2011 with Jermaine Grant, a fellow British citizen currently held in Mombasa on terror charges.", + "length": 274 + }, + { + "text": "\"What you have to look out for, also in the United States and the United Kingdom and all these other Scandinavian countries, are these small, small networks that are in one sense detached also from the Somali community leaders -- radicalized groups of youths and radical preachers, sheikhs, that go traveling around the various countries to try to incite,\" he said.", + "length": 365 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.774372398853302 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:04.749223408Z", + "first_section_created": "2025-12-23T09:36:04.749591323Z", + "last_section_published": "2025-12-23T09:36:04.749831633Z", + "all_results_received": "2025-12-23T09:36:04.806394411Z", + "output_generated": "2025-12-23T09:36:04.806574019Z", + "total_processing_time_ms": 57, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 56, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:04.749591323Z", + "publish_time": "2025-12-23T09:36:04.749831633Z", + "first_worker_start": "2025-12-23T09:36:04.750365054Z", + "last_worker_end": "2025-12-23T09:36:04.805532Z", + "total_journey_time_ms": 55, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:04.750391655Z", + "start_time": "2025-12-23T09:36:04.750470458Z", + "end_time": "2025-12-23T09:36:04.750577663Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:04.750648Z", + "start_time": "2025-12-23T09:36:04.750804Z", + "end_time": "2025-12-23T09:36:04.805532Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:04.750287651Z", + "start_time": "2025-12-23T09:36:04.750365054Z", + "end_time": "2025-12-23T09:36:04.750452058Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:04.750404456Z", + "start_time": "2025-12-23T09:36:04.750475859Z", + "end_time": "2025-12-23T09:36:04.750539161Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 54, + "min_processing_ms": 54, + "max_processing_ms": 54, + "avg_processing_ms": 54, + "median_processing_ms": 54, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4900, + "slowest_section_id": 0, + "slowest_section_time_ms": 55 + } +} diff --git a/data/output/006dd19dd114a34e2b383700442c0ef50046422f.json b/data/output/006dd19dd114a34e2b383700442c0ef50046422f.json new file mode 100644 index 0000000..1540d1d --- /dev/null +++ b/data/output/006dd19dd114a34e2b383700442c0ef50046422f.json @@ -0,0 +1,310 @@ +{ + "file_name": "006dd19dd114a34e2b383700442c0ef50046422f.txt", + "total_words": 604, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "she", + "count": 14 + }, + { + "word": "her", + "count": 13 + }, + { + "word": "ruby", + "count": 10 + }, + { + "word": "at", + "count": 8 + }, + { + "word": "berlusconi", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "‘ .", + "length": 5 + }, + { + "text": "What a shame.", + "length": 13 + }, + { + "text": "Hannah Roberts In Rome .", + "length": 24 + }, + { + "text": "Both deny sexual contact.", + "length": 25 + }, + { + "text": "She brought it all on herself.", + "length": 30 + }, + { + "text": "Miss Guerra tweeted: ‘Fxxx off!", + "length": 33 + }, + { + "text": "Go back to Morocco you big whore.", + "length": 33 + }, + { + "text": "She did half of Milan in six months.", + "length": 36 + }, + { + "text": "Maybe she’s afraid of being lynched.", + "length": 38 + }, + { + "text": "At least have the respect not to go inside.", + "length": 43 + }, + { + "text": "She has had more men than there are Picasso paintings.", + "length": 54 + }, + { + "text": "Both women gave evidence for the defence at the Ruby sex trial in 2011.", + "length": 71 + }, + { + "text": "She ruined all our lives and that of the president [sic] just to get famous.", + "length": 76 + }, + { + "text": "Miss Guerra responded: ‘Why doesn’t she return home to whore herself out.", + "length": 77 + }, + { + "text": "Former Italian premier Silvio Berlusconi is on trial for sex with Ruby when she was just 17.", + "length": 92 + }, + { + "text": "Exotic dancer: Bunga Bunga party girl Ruby the Heartstealer claims she was not a prostitute .", + "length": 93 + }, + { + "text": "While the age of consent is 14 in Italy, it is illegal to have sex with a prostitute who is under 18.", + "length": 101 + }, + { + "text": "But Miss Guerra ridiculed the remark tweeting: ‘She has become Catholic and went to church at Easter!", + "length": 103 + }, + { + "text": "His representatives argued that the Milan judges are politically prejudiced against the former premier.", + "length": 103 + }, + { + "text": "’ Former Italian president Silvio Berlusconi who denies have sex with Bunga Bunga girl Ruby the Heartstealer .", + "length": 112 + }, + { + "text": "Party girl: Austrian entrepreneur Richard Lugner dancing with Ruby at the traditional Vienna Opera Ball in 2011 .", + "length": 113 + }, + { + "text": "Each lived, at the time, in their own luxury apartment owned in Milan, courtesy of the billionaire media magnate.", + "length": 113 + }, + { + "text": "Ruby, real name Karima el Mahroug, broke her two year silence on Thursday to claim she had never had sex for money.", + "length": 115 + }, + { + "text": "In a tearful protest on the steps of an Italian court, Ruby the Heartstealer claimed she had never been a prostitute.", + "length": 117 + }, + { + "text": "The Ruby sex trial has continued for more than year, subject to interminable delays by Berlusconi’s defence lawyers.", + "length": 118 + }, + { + "text": "Her one-time comrades have rubbished her claims, labelling her a ‘whore’ who ‘did half of Milan in six months’.", + "length": 119 + }, + { + "text": "The decision is expected as Berlusconi battles to secure a place for his centre right party in a new Italian government .", + "length": 121 + }, + { + "text": "But two of her former associates watching on TV, immediately took to Twitter to castigate her for ‘ruining all our lives’.", + "length": 126 + }, + { + "text": "But, the exotic dancer at the centre of the Berlusconi 'bunga bunga' party scandal, doesn’t seem to have everyone convinced.", + "length": 126 + }, + { + "text": "Miss Visan revealed in court that she herself was paid almost  £2,000 a month by Berlusconi ‘towards her university studies’.", + "length": 131 + }, + { + "text": "’ Another of the bunga bunga girls, Romanian escort Ioana Visan, said: ‘You can’t throw stones and then hide your hands behind your back.", + "length": 143 + }, + { + "text": "In an extraordinarily venomous attack, TV showgirl Barbara Guerra labelled Ruby a ‘hypocrite’, who had had more lovers ‘than a porn star’.", + "length": 146 + }, + { + "text": "’ Karima el-Mahroug took the witness stand in the trial of three former Berlusconi aides charged with recruiting her and other women for prostitution .", + "length": 153 + }, + { + "text": "In the course of her dramatic statement on Thursday, Ruby claimed that she had been abused by a stranger at church, an experience that prompted her emotional demonstration.", + "length": 172 + }, + { + "text": "’ The two women seemed not to care who was party to their Twitter conversation, at one point retweeting a follower who asked: ‘Do you know that we are all reading all this?", + "length": 176 + }, + { + "text": "The former nightclub performer, who has since the scandal broke in 2011 had a baby daughter, Sofia, one, said she now wanted to clear her name, so her child ‘could be proud of her’.", + "length": 185 + }, + { + "text": "The verdict was expected last month but proceedings are currently on hold pending a Supreme Court decision, after Berlusconi’s lawyers demanded the trial be moved to the town of Brescia.", + "length": 188 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6684573888778687 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:05.25092952Z", + "first_section_created": "2025-12-23T09:36:05.251209631Z", + "last_section_published": "2025-12-23T09:36:05.251386438Z", + "all_results_received": "2025-12-23T09:36:05.318908359Z", + "output_generated": "2025-12-23T09:36:05.319076965Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:05.251209631Z", + "publish_time": "2025-12-23T09:36:05.251386438Z", + "first_worker_start": "2025-12-23T09:36:05.251902159Z", + "last_worker_end": "2025-12-23T09:36:05.317968Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:05.251897959Z", + "start_time": "2025-12-23T09:36:05.251960762Z", + "end_time": "2025-12-23T09:36:05.252040965Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:05.252128Z", + "start_time": "2025-12-23T09:36:05.252265Z", + "end_time": "2025-12-23T09:36:05.317968Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:05.251832056Z", + "start_time": "2025-12-23T09:36:05.25191676Z", + "end_time": "2025-12-23T09:36:05.251995263Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:05.251810955Z", + "start_time": "2025-12-23T09:36:05.251902159Z", + "end_time": "2025-12-23T09:36:05.25193056Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3561, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/006dd31c76a6cbdf235b094f8081c8b4d3d82350.json b/data/output/006dd31c76a6cbdf235b094f8081c8b4d3d82350.json new file mode 100644 index 0000000..afea0d5 --- /dev/null +++ b/data/output/006dd31c76a6cbdf235b094f8081c8b4d3d82350.json @@ -0,0 +1,302 @@ +{ + "file_name": "006dd31c76a6cbdf235b094f8081c8b4d3d82350.txt", + "total_words": 745, + "top_n_words": [ + { + "word": "the", + "count": 49 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "tin", + "count": 13 + }, + { + "word": "at", + "count": 11 + }, + { + "word": "is", + "count": 10 + }, + { + "word": "on", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "At 5.", + "length": 5 + }, + { + "text": "' At 11.", + "length": 8 + }, + { + "text": "Minutes later at 6.", + "length": 19 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "One of the men frequently appears in a 'deerstalker hat.", + "length": 56 + }, + { + "text": "He is often seen wearing a distinctive-checked deerstalker hat.", + "length": 63 + }, + { + "text": "'The first man is described as white, 6ft, of a slim build and in his 20s.", + "length": 74 + }, + { + "text": "A worker in One Stop (pictured) grabbed back one of the tins from the thief .", + "length": 77 + }, + { + "text": "But he was spotted by a worker who grabbed the tin and the man fled the store.", + "length": 78 + }, + { + "text": "'This is a wicked and deplorable crime and we need the public to help us catch those responsible.", + "length": 97 + }, + { + "text": "They were then seen getting into a green coloured car - possibly a Ford Focus - and speeding off.", + "length": 97 + }, + { + "text": "'The other man is described as black, around 5ft 11ins, of a medium build and is also in his 20s.", + "length": 97 + }, + { + "text": "Just 30 minutes later, at around 7pm, another tin was stolen from Threshers on Widney Road also in Knowle.", + "length": 106 + }, + { + "text": "The men are captured on CCTV casually placing the poppy tins into their coats and bags to steal donations .", + "length": 107 + }, + { + "text": "But he is foiled after the worker returns and spots him, removing the poppy collection tin hidden in his bag .", + "length": 110 + }, + { + "text": "The man appears to have got away with the theft, putting the scissors and collection tin in his bag at the store .", + "length": 114 + }, + { + "text": "One of the thieves is caught on CCTV cutting a string attaching a poppy collection tin to the counter at One Stop .", + "length": 115 + }, + { + "text": "The man is seen trying to put the tin into his bag - the fourth attempt he had made to steal a poppy tin that day .", + "length": 115 + }, + { + "text": "35am last Monday the men walked into Meriden Tea Rooms on Fillongley Road, Solihull and pretended to wait to be seated.", + "length": 119 + }, + { + "text": "' Anyone with information about the identity of the men is urged to call police on 101 or Crimestoppers on 0800 555 111.", + "length": 120 + }, + { + "text": "Security cameras showed one of them take a poppy tin on the counter and calmly place it under his jacket before walking out.", + "length": 124 + }, + { + "text": "Their actions have been denounced as a 'wicked and deplorable crime', taking money that had been donated to help war veterans.", + "length": 126 + }, + { + "text": "Officers also believe the pair targeted Lloyds Pharmacy in Meriden, as well as the butchers and dry cleaners in Balsall Common.", + "length": 127 + }, + { + "text": "Inspector Allan Green, who is investigating the thefts, said: 'There are no circumstances under which such thefts are justifiable.", + "length": 130 + }, + { + "text": "The next day, a man walked into Murco petrol station in Knowle, at 2pm and again stuffed a tin under his jacket before walking out.", + "length": 131 + }, + { + "text": "30pm, one of the men went into One Stop in Knowle, and tried to steal a poppy tin by cutting the string attaching it to the counter.", + "length": 132 + }, + { + "text": "Then at around 2pm on the same day, a poppy tin was stolen from Balsall Common Pharmacy after a man walked in and placed it in a plastic bag.", + "length": 141 + }, + { + "text": "Police have released CCTV of two men they are trying to find in connection with a string of thefts of poppy collection tins in the West Midlands.", + "length": 145 + }, + { + "text": "'From our CCTV inquiries, it appears one man usually removes the tin while the other acts as a distraction to anyone who may try and disturb him.", + "length": 145 + }, + { + "text": "Two thieves have been caught on camera stealing poppy tins as part of a three day spree to raid the charity collections just before Remembrance Sunday.", + "length": 151 + }, + { + "text": "45pm the thieves attempted to steal an appeal tin from Blooms Garden Centre in nearby Hampton-in-Arden, but were unsuccessful because it was chained to the counter.", + "length": 164 + }, + { + "text": "Police are looking for the two men who struck a tea room, pharmacy and three shops in Solihull within a few hours - with some of the thefts taking place within minutes of each other.", + "length": 183 + }, + { + "text": "'We are doing all we can to track down the pair but in the meantime, we would urge any businesses with poppy tins to be vigilant and report any thefts to us at the earliest opportunity.", + "length": 185 + }, + { + "text": "The string of thefts have taken place days before Remembrance Sunday as preparations are made at Westminster Abbey with red poppies on crosses being planted at the Field of Remembrance .", + "length": 186 + }, + { + "text": "West Midlands Police has released CCTV footage of the pair as they struck shops and a garden centre over three days to repeatedly swipe collection tins, and are now trying to trace them.", + "length": 186 + }, + { + "text": "'This money was intended for service men and women, veterans and their families - people who have risked their lives for our country, and the loved ones of those who gave their lives doing so.", + "length": 192 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8374722003936768 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:05.752145512Z", + "first_section_created": "2025-12-23T09:36:05.753619872Z", + "last_section_published": "2025-12-23T09:36:05.753853881Z", + "all_results_received": "2025-12-23T09:36:05.816569208Z", + "output_generated": "2025-12-23T09:36:05.816732514Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:05.753619872Z", + "publish_time": "2025-12-23T09:36:05.753853881Z", + "first_worker_start": "2025-12-23T09:36:05.754412303Z", + "last_worker_end": "2025-12-23T09:36:05.815631Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:05.754377602Z", + "start_time": "2025-12-23T09:36:05.754442105Z", + "end_time": "2025-12-23T09:36:05.754502007Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:05.754617Z", + "start_time": "2025-12-23T09:36:05.754769Z", + "end_time": "2025-12-23T09:36:05.815631Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:05.754343401Z", + "start_time": "2025-12-23T09:36:05.754412303Z", + "end_time": "2025-12-23T09:36:05.754494507Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:05.754384502Z", + "start_time": "2025-12-23T09:36:05.754449405Z", + "end_time": "2025-12-23T09:36:05.754485506Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4078, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/006ddd076db9a0dd7c853f48cc564cf007e1b9e7.json b/data/output/006ddd076db9a0dd7c853f48cc564cf007e1b9e7.json new file mode 100644 index 0000000..d3984fc --- /dev/null +++ b/data/output/006ddd076db9a0dd7c853f48cc564cf007e1b9e7.json @@ -0,0 +1,420 @@ +{ + "file_name": "006ddd076db9a0dd7c853f48cc564cf007e1b9e7.txt", + "total_words": 875, + "top_n_words": [ + { + "word": "the", + "count": 48 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "mr", + "count": 13 + }, + { + "word": "cameron", + "count": 12 + }, + { + "word": "s", + "count": 12 + }, + { + "word": "that", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Some 5.", + "length": 7 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "7 per cent.", + "length": 11 + }, + { + "text": "the budget.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "1billion EU rebate.", + "length": 19 + }, + { + "text": "11:50 EST, 27 June 2013 .", + "length": 25 + }, + { + "text": "03:19 EST, 28 June 2013 .", + "length": 25 + }, + { + "text": "spending cut in its history.", + "length": 28 + }, + { + "text": "Mr Cameron’s deal will save UK .", + "length": 34 + }, + { + "text": "Diplomats hailed it as the biggest .", + "length": 36 + }, + { + "text": "We are confident this is resolved now.", + "length": 38 + }, + { + "text": "The Prime Minister celebrated a major .", + "length": 39 + }, + { + "text": "Tamara Cohen In Brussels and Daily Mail Reporter .", + "length": 50 + }, + { + "text": "Only in the EU would it increase at a time of belt-tightening.", + "length": 62 + }, + { + "text": "They will vote on the £30billion cut to £770billion next week.", + "length": 64 + }, + { + "text": "Mr Cameron said details should now be ‘locked down’ in EU law.", + "length": 66 + }, + { + "text": "And spending on the pensions of Brussels bureaucrats will rise by 7.", + "length": 68 + }, + { + "text": "by Margaret Thatcher at the Fontainebleau summit almost 30 years ago.", + "length": 69 + }, + { + "text": "diplomatic coup in Brussels in February after securing the first EU .", + "length": 69 + }, + { + "text": "taxpayers hundreds of millions of pounds on what  they would have been .", + "length": 73 + }, + { + "text": "British triumph for the UK in an EU negotiation since the rebate was won .", + "length": 74 + }, + { + "text": "forced to pay under the European Commission’s initial plans to increase .", + "length": 75 + }, + { + "text": "'Administration is normally the first thing to be cut in any government’s budget.", + "length": 83 + }, + { + "text": "David Cameron scored a vital victory for taxpayers last night as he protected Britain’s £3.", + "length": 94 + }, + { + "text": "Under the agreement every other country with rebates had to agree to give up some of the money.", + "length": 95 + }, + { + "text": "The refund was granted because Britain get disproportionately less in farm subsidies than France.", + "length": 97 + }, + { + "text": "' Jobs: Mr Cameron is calling for rules to be relaxed to encourage firms to take on young people .", + "length": 98 + }, + { + "text": "Battle: The Prime Minister insisted it was 'essential' that the British rebate remained protected .", + "length": 99 + }, + { + "text": "2 per cent partly because of costs associated with an increase in officials taking early retirement.", + "length": 100 + }, + { + "text": "The Prime Minister fought off a raid by France on the UK’s annual clawback of funding from Brussels.", + "length": 102 + }, + { + "text": "After talks with European Council president Herman Van Rompuy, Mr Cameron insisted the deal still stands.", + "length": 105 + }, + { + "text": "A Downing Street source later said there were ‘assurances’ that Brussels will stick to February’s deal.", + "length": 109 + }, + { + "text": "The Prime Minister arrived at a summit in Brussels insisting he would block any attempt to reduce the rebate.", + "length": 109 + }, + { + "text": "But four months on, Francois Hollande's government launched a last-ditch attempt to slash Britain’s rebate.", + "length": 109 + }, + { + "text": "Mr Van Rompuy has declared improving the job prospects of young people the main theme for the two-day gathering.", + "length": 112 + }, + { + "text": "Mr Van Rompuy is emphasising the need for money to be pumped into projects that could boost their employment chances.", + "length": 117 + }, + { + "text": "surveillance operation, the existence of which was revealed by US National Security Agency whistle-blower Edward Snowden.", + "length": 121 + }, + { + "text": "6 million are out of work in the eurozone, and in Greece and Spain the youth unemployment rate has rocketed to over 50 per cent.", + "length": 128 + }, + { + "text": "Win: David Cameron, pictured arriving at an EU summit in Brussels, was confident of securing a deal to protect Britain's rebate .", + "length": 129 + }, + { + "text": "In a second victory for Mr Cameron, MEPs also backed down and agreed to the first ever EU budget reduction in its 56-year history.", + "length": 130 + }, + { + "text": "' A Downing Street source said afterwards: 'We have assurances from Mr Van Rompuy that we will stick to the deal done in February.", + "length": 130 + }, + { + "text": "Mr Cameron blocked the refund raid after talks with European Council President Herman Van Rompuy ahead of the EU summit in Brussels.", + "length": 132 + }, + { + "text": "But Mr Cameron is due to argue that loosening labour market rules and freeing up small and medium companies to take on young staff is more important.", + "length": 149 + }, + { + "text": "Paris said Britain’s refund should not be exempt from a deal to hand over £300million a year in taxpayer-funded farming subsidies for new member states.", + "length": 155 + }, + { + "text": "Martin Callanan MEP, the leader of the European Conservatives, said: 'This is an historic cut but we still have enormous amounts of fat that can be trimmed.", + "length": 156 + }, + { + "text": "' But despite reductions in some areas of spending such as regional aid, the cost of running the EU civil service, assembly and quangos will still rise by 1.", + "length": 157 + }, + { + "text": "The rebate was won for Britain by Baroness Thatcher in 1984 after her ‘handbagging’ of European leaders and her vow of ‘not a penny more’ for Brussels.", + "length": 159 + }, + { + "text": "Exemption from the farming subsidy deal was a key part of the UK’s rebate settlement, agreed at landmark talks in February, but it has come under increasing attack from France.", + "length": 178 + }, + { + "text": "Mr Cameron could also face questions from German chancellor Angela Merkel and others after it emerged that eavesdropping agency GCHQ has been mass-monitoring global phone and internet traffic.", + "length": 192 + }, + { + "text": "As he went into the meeting Mr Cameron said: 'It is absolutely essential that we stick to the deal we reached in February and that we protect the British rebate, and I will make sure that we do that.", + "length": 199 + }, + { + "text": "He has staked Britain's continuing membership of the EU on his ability to negotiate fundamental reform of it - with an in-out referendum due to be held in 2017 if the Tories win the general election.", + "length": 199 + }, + { + "text": "' Mr Cameron added: 'This council should be about doing in Brussels what we are doing in Britain, which is getting control of spending, making sure we live within our means, and then making ourselves more competitive, getting rid of regulations, making it easier for businesses to create jobs.", + "length": 293 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4908274859189987 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:06.254318143Z", + "first_section_created": "2025-12-23T09:36:06.256498631Z", + "last_section_published": "2025-12-23T09:36:06.256858745Z", + "all_results_received": "2025-12-23T09:36:06.343032517Z", + "output_generated": "2025-12-23T09:36:06.343270726Z", + "total_processing_time_ms": 88, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:06.256498631Z", + "publish_time": "2025-12-23T09:36:06.25673394Z", + "first_worker_start": "2025-12-23T09:36:06.25722176Z", + "last_worker_end": "2025-12-23T09:36:06.342087Z", + "total_journey_time_ms": 85, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:06.257164558Z", + "start_time": "2025-12-23T09:36:06.25722176Z", + "end_time": "2025-12-23T09:36:06.257325664Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:06.257499Z", + "start_time": "2025-12-23T09:36:06.25766Z", + "end_time": "2025-12-23T09:36:06.342087Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 84 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:06.257180758Z", + "start_time": "2025-12-23T09:36:06.257248261Z", + "end_time": "2025-12-23T09:36:06.257360665Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:06.25723676Z", + "start_time": "2025-12-23T09:36:06.257347265Z", + "end_time": "2025-12-23T09:36:06.257412067Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:06.256801543Z", + "publish_time": "2025-12-23T09:36:06.256858745Z", + "first_worker_start": "2025-12-23T09:36:06.257260461Z", + "last_worker_end": "2025-12-23T09:36:06.317728Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:06.257307763Z", + "start_time": "2025-12-23T09:36:06.257331964Z", + "end_time": "2025-12-23T09:36:06.257337264Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:06.257748Z", + "start_time": "2025-12-23T09:36:06.257879Z", + "end_time": "2025-12-23T09:36:06.317728Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:06.257239361Z", + "start_time": "2025-12-23T09:36:06.257298363Z", + "end_time": "2025-12-23T09:36:06.257304863Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:06.25721646Z", + "start_time": "2025-12-23T09:36:06.257260461Z", + "end_time": "2025-12-23T09:36:06.257261761Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 143, + "min_processing_ms": 59, + "max_processing_ms": 84, + "avg_processing_ms": 71, + "median_processing_ms": 84, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2559, + "slowest_section_id": 0, + "slowest_section_time_ms": 85 + } +} diff --git a/data/output/006e02fe8bb2b880c9ac1c2547dcaca5bff57263.json b/data/output/006e02fe8bb2b880c9ac1c2547dcaca5bff57263.json new file mode 100644 index 0000000..d9b9e5e --- /dev/null +++ b/data/output/006e02fe8bb2b880c9ac1c2547dcaca5bff57263.json @@ -0,0 +1,198 @@ +{ + "file_name": "006e02fe8bb2b880c9ac1c2547dcaca5bff57263.txt", + "total_words": 206, + "top_n_words": [ + { + "word": "in", + "count": 13 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "were", + "count": 6 + }, + { + "word": "killed", + "count": 5 + }, + { + "word": "the", + "count": 5 + }, + { + "word": "wounded", + "count": 5 + }, + { + "word": "a", + "count": 3 + }, + { + "word": "baghdad", + "count": 3 + }, + { + "word": "officials", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "Mohammed Khamas, killing him instantly.", + "length": 39 + }, + { + "text": "Most of killed and wounded were civilians, officials said.", + "length": 58 + }, + { + "text": "Khamas was the deputy head of army intelligence department in Mosul.", + "length": 68 + }, + { + "text": "In northern Mosul, about 400 kilometers (249 miles) north of Baghdad, a bomb exploded in the convoy of army Gen.", + "length": 112 + }, + { + "text": "Officials said 29 people were killed and 107 wounded in 11 car bomb explosions in nine different parts of Baghdad.", + "length": 114 + }, + { + "text": "More than 800 Iraqis were killed and 2,030 wounded in violence and acts of terrorism in August, the United Nations said.", + "length": 120 + }, + { + "text": "(CNN) -- At least 35 people were killed and more than 100 wounded in shootings and explosions across the country on Tuesday, officials with Iraq's interior ministry told CNN.", + "length": 174 + }, + { + "text": "In Falluja, about 60 kilometers (37 miles) west of Baghdad, five people were killed and 12 others were wounded when gunmen attacked al-Tahadi police station in southern Falluja.", + "length": 177 + }, + { + "text": "Sunnis have felt politically marginalized under Shiite President Nuri al-Maliki, whose government fears it is being targeted by Sunni Islamists involved in fighting in neighboring Syria.", + "length": 186 + }, + { + "text": "Iraq has seen a sharp increase in friction between its Shiite and Sunni populations since April, when Iraqi security forces raided a site used by Sunni protesters to demonstrate against the Shiite-led government.", + "length": 212 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.658860981464386 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:06.757630019Z", + "first_section_created": "2025-12-23T09:36:06.757999134Z", + "last_section_published": "2025-12-23T09:36:06.758234844Z", + "all_results_received": "2025-12-23T09:36:06.820041534Z", + "output_generated": "2025-12-23T09:36:06.82019264Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:06.757999134Z", + "publish_time": "2025-12-23T09:36:06.758234844Z", + "first_worker_start": "2025-12-23T09:36:06.758750465Z", + "last_worker_end": "2025-12-23T09:36:06.819188Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:06.758731364Z", + "start_time": "2025-12-23T09:36:06.758781966Z", + "end_time": "2025-12-23T09:36:06.758806767Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:06.758936Z", + "start_time": "2025-12-23T09:36:06.759076Z", + "end_time": "2025-12-23T09:36:06.819188Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:06.758727364Z", + "start_time": "2025-12-23T09:36:06.758795066Z", + "end_time": "2025-12-23T09:36:06.758836768Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:06.758693862Z", + "start_time": "2025-12-23T09:36:06.758750465Z", + "end_time": "2025-12-23T09:36:06.758764165Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1269, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/006edc1803586c921370221519784fb850f25d63.json b/data/output/006edc1803586c921370221519784fb850f25d63.json new file mode 100644 index 0000000..77d5fb3 --- /dev/null +++ b/data/output/006edc1803586c921370221519784fb850f25d63.json @@ -0,0 +1,412 @@ +{ + "file_name": "006edc1803586c921370221519784fb850f25d63.txt", + "total_words": 1192, + "top_n_words": [ + { + "word": "the", + "count": 65 + }, + { + "word": "to", + "count": 42 + }, + { + "word": "and", + "count": 38 + }, + { + "word": "her", + "count": 31 + }, + { + "word": "she", + "count": 30 + }, + { + "word": "was", + "count": 27 + }, + { + "word": "that", + "count": 26 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "i", + "count": 23 + }, + { + "word": "of", + "count": 20 + } + ], + "sorted_sentences": [ + { + "text": "'I .", + "length": 4 + }, + { + "text": "' Clare said.", + "length": 13 + }, + { + "text": "Saturday to arrive.", + "length": 19 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'We waited eagerly for .", + "length": 24 + }, + { + "text": "weeks to buy her outfit.", + "length": 24 + }, + { + "text": "The 17-year-old, identified only as .", + "length": 37 + }, + { + "text": "'I strongly believe they did not know we were .", + "length": 47 + }, + { + "text": "together until the situation had already escalated.", + "length": 51 + }, + { + "text": "My dress was gorgeous, silver, and sparkly and I got .", + "length": 54 + }, + { + "text": "'I’m only 17, but I can see there’s something wrong about this ...", + "length": 70 + }, + { + "text": "it at Macy’s and was very excited to find it after searching over six .", + "length": 73 + }, + { + "text": "' He is the only one in the group of friends to have been issued a refund.", + "length": 74 + }, + { + "text": "stores for this dress,' she said, adding that she had saved over several .", + "length": 74 + }, + { + "text": "balcony above us and look down on us and single us out for our clothes or dancing.", + "length": 82 + }, + { + "text": "'[Is] Richmond Homeschool Prom adult enough to own up to their wrong actions as well.", + "length": 85 + }, + { + "text": "don’t feel race played a part in all that happened Saturday night,' Clare's boyfriend said.", + "length": 93 + }, + { + "text": "Dress code: Home school student Clare had made sure her dress met the required minimum length .", + "length": 95 + }, + { + "text": "' The teenager said she wants answers from the organizers about the way she feels she was treated.", + "length": 98 + }, + { + "text": "The family also denied that racism had been a factor in Clare and her boyfriend being asked to leave.", + "length": 101 + }, + { + "text": "Organizers for the Richmond Homeschool Prom have not yet responded to a Mail Online request for comment.", + "length": 104 + }, + { + "text": "'When I got into the ballroom I laughed, because I was surrounded by girls in much shorter dresses then me.", + "length": 107 + }, + { + "text": "It was at that point that one of the organizers approached Clare again and asked to speak to her in private.", + "length": 108 + }, + { + "text": "' The experience has left the teenager feeling frustrated and angry at being singled out for her appearance.", + "length": 108 + }, + { + "text": "Clare, is demanding a refund for her and her friends, who walked out of the prom with her in a show of unity.", + "length": 109 + }, + { + "text": "Big night: The prom for home school students was being held at the Shady Grove Methodist church in Virginia .", + "length": 109 + }, + { + "text": "Ruined: Virginia teenager Clare had been excited about her prom but organizers asked her to leave because of her dress .", + "length": 120 + }, + { + "text": "As she was being escorted, crying, from the venue, Clare says she asked the security guard what he made of the situation.", + "length": 121 + }, + { + "text": "Clare's date also tried to explain that because the group had arrived together, if she was made to leave they would all have to go.", + "length": 131 + }, + { + "text": "The teenager had been excited about the $25-a-ticket Twilight in Paris prom, which offered a welcome break as she studied for her finals.", + "length": 137 + }, + { + "text": "'We were also a little grossed out by all the dads on the balcony above the dance floor, ogling and talking amongst themselves,' she said.", + "length": 138 + }, + { + "text": "Support: Clare's prom date, pictured, and her friends, who drove to the dance together, are demanding a refund after having to leave early .", + "length": 140 + }, + { + "text": "Despite following the dress code on skirt length, and trying to reason with the organizers, the student says she was ordered to leave the dance.", + "length": 144 + }, + { + "text": "' Friends of the teenager told her later that as the evening went on, other students were dancing proactively, yet no one intervened to stop them.", + "length": 146 + }, + { + "text": "When the friends asked if they would be refunded for a dance they had spent less than half an hour at, they were told only Clare would be given a refund.", + "length": 153 + }, + { + "text": "But, as soon as she walking into the venue last weekend, one of the organizers stopped her at the door and said: 'Honey, that dress is too short,' she claimed.", + "length": 159 + }, + { + "text": "Her concerned friends tried to intervene in the conversation, and say they hadn't even been dancing, but they were told the situation 'was none of their business'.", + "length": 163 + }, + { + "text": "In a post she wrote on her sister's Wine and Marble blog about the incident, Clare said she showed the organizer that the dress did meet the regulation and she was let in.", + "length": 171 + }, + { + "text": "A short while later, as the teenager chatted to her friends and prom date while swaying to the music, she said she noticed a group of fathers staring at her from a balcony.", + "length": 172 + }, + { + "text": "please tell me I’m not the only one who think it doesn’t matter how people are dressed or how they move their bodies, we should still treat them with respect and decency.", + "length": 174 + }, + { + "text": "Clare is currently getting ready to sit her final examinations, but her sister Hannah said she was overwhelmed by the messages of support and the reaction to her experiences.", + "length": 174 + }, + { + "text": "'We walked out of the prom, frustrated and angry and feeling very disrespected and violated,' Clare said, admitting that in a moment of immaturity they had shouted profanities.", + "length": 176 + }, + { + "text": "Clare added that she felt singled out for her appearance, and is demanding that the Richmond Homeschool Prom organizers refund her friends who were forced to leave alongside her.", + "length": 178 + }, + { + "text": "'My group of five people had to leave the prom because I stuck out, I have long legs and I was wearing a sparkly dress, I didn’t look like most of the 13-15 year old girls there.", + "length": 180 + }, + { + "text": "The teenager said that despite trying to tell the woman that she and her friends hadn't even been dancing, she was told again that her dress was too short and she would have to leave.", + "length": 183 + }, + { + "text": "'I felt violated by the sheer number of male parents that were assigned to do nothing for five hours other then watch girls in short dresses and heels dance to upbeat music,' Clare added.", + "length": 187 + }, + { + "text": "After trawling round clothes stores she found the perfect outfit - a sparkly silver dress that met the regulation length of being longer than the fingertips when her arms were by her side.", + "length": 188 + }, + { + "text": "A Virginia teenager says she was forced to leave her prom after fathers who were acting as chaperones at the event complained that she was dancing provocatively and her skirt was too short.", + "length": 189 + }, + { + "text": "'I was told that the way I dressed and moved my body was causing men to think inappropriately about me, implying that it is my responsibility to control other people’s thoughts and drives,' she said.", + "length": 201 + }, + { + "text": "'[She] told me that some of the dads who were chaperoning had complained that my dancing was too provocative, and that I was going to cause the young men at the prom to think impure thoughts,' Clare claimed.", + "length": 207 + }, + { + "text": "And refund my group as they verbally promised to do, and issue an apology for kicking me out of my senior prom because their husbands felt as though my body was something they had a right to control,' she said.", + "length": 210 + }, + { + "text": "'I asked the security guy if my dress was compliant with the dress code and if he had noticed any inappropriateness in my behavior and he said he didn’t think I did anything to get kicked out but it wasn’t his call,' Clare wrote.", + "length": 233 + }, + { + "text": "'I was talked to disrespectfully, ganged up on and treated as less then a person by people in authority, and when I requested to have one of my peers present to validate later what was said in this \"meeting\" I was denied that right,' she said.", + "length": 243 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5881001651287079 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:07.25904452Z", + "first_section_created": "2025-12-23T09:36:07.259361932Z", + "last_section_published": "2025-12-23T09:36:07.259702346Z", + "all_results_received": "2025-12-23T09:36:07.363000908Z", + "output_generated": "2025-12-23T09:36:07.363247518Z", + "total_processing_time_ms": 104, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 103, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:07.259361932Z", + "publish_time": "2025-12-23T09:36:07.259584341Z", + "first_worker_start": "2025-12-23T09:36:07.260120763Z", + "last_worker_end": "2025-12-23T09:36:07.341003Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:07.260134764Z", + "start_time": "2025-12-23T09:36:07.260194466Z", + "end_time": "2025-12-23T09:36:07.26029837Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:07.260366Z", + "start_time": "2025-12-23T09:36:07.260534Z", + "end_time": "2025-12-23T09:36:07.341003Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 80 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:07.260119263Z", + "start_time": "2025-12-23T09:36:07.260181365Z", + "end_time": "2025-12-23T09:36:07.260272969Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:07.26004796Z", + "start_time": "2025-12-23T09:36:07.260120763Z", + "end_time": "2025-12-23T09:36:07.260176465Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:07.259628943Z", + "publish_time": "2025-12-23T09:36:07.259702346Z", + "first_worker_start": "2025-12-23T09:36:07.260183065Z", + "last_worker_end": "2025-12-23T09:36:07.362106Z", + "total_journey_time_ms": 102, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:07.260146864Z", + "start_time": "2025-12-23T09:36:07.260194066Z", + "end_time": "2025-12-23T09:36:07.260229067Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:07.260962Z", + "start_time": "2025-12-23T09:36:07.261085Z", + "end_time": "2025-12-23T09:36:07.362106Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 101 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:07.260157464Z", + "start_time": "2025-12-23T09:36:07.260191266Z", + "end_time": "2025-12-23T09:36:07.260237768Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:07.260158764Z", + "start_time": "2025-12-23T09:36:07.260183065Z", + "end_time": "2025-12-23T09:36:07.260197866Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 181, + "min_processing_ms": 80, + "max_processing_ms": 101, + "avg_processing_ms": 90, + "median_processing_ms": 101, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3261, + "slowest_section_id": 1, + "slowest_section_time_ms": 102 + } +} diff --git a/data/output/006f05c3183eacea289414270c02872627f4a0b4.json b/data/output/006f05c3183eacea289414270c02872627f4a0b4.json new file mode 100644 index 0000000..473f582 --- /dev/null +++ b/data/output/006f05c3183eacea289414270c02872627f4a0b4.json @@ -0,0 +1,246 @@ +{ + "file_name": "006f05c3183eacea289414270c02872627f4a0b4.txt", + "total_words": 466, + "top_n_words": [ + { + "word": "the", + "count": 52 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "said", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "family", + "count": 7 + }, + { + "word": "he", + "count": 7 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "plane", + "count": 7 + }, + { + "word": "a", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "\"Considering the circumstances, Ruben is doing fine.", + "length": 52 + }, + { + "text": "\"We hope that all the media will respect our privacy.", + "length": 53 + }, + { + "text": "All but one of the 104 passengers on board were killed.", + "length": 55 + }, + { + "text": "\"He has drunk a little, and has seen the flowers and cuddly toys.", + "length": 65 + }, + { + "text": "\"The coming period will be very difficult for us,\" the family said.", + "length": 67 + }, + { + "text": "The family also thanked \"the vast majority of the Dutch media for respecting our privacy.", + "length": 89 + }, + { + "text": "\" More than two-thirds of the passengers killed in the plane crash were Dutch, the ministry said.", + "length": 97 + }, + { + "text": "\" The family, which will raise Ruben, said the boy knows that his parents and brother were killed.", + "length": 98 + }, + { + "text": "He is sleeping a lot; now and then he wakes up and is then lucid,\" the family said in the statement.", + "length": 100 + }, + { + "text": "Both of Ruben's parents and a brother were killed in the crash, a Dutch foreign ministry representative said.", + "length": 109 + }, + { + "text": "An investigation into the crash is under way, and authorities are reviewing the aircraft's flight data recorder.", + "length": 112 + }, + { + "text": "Meanwhile Ruben van Assouw, the 9-year-old sole survivor of the plane crash, has returned home to the Netherlands.", + "length": 114 + }, + { + "text": "He realized he was in trouble and tried to pull the plane up and turn the auto-pilot back on to give it another try, the sources said.", + "length": 134 + }, + { + "text": "The Dutch foreign ministry said Friday it is sending more experts to Tripoli to help Libyan authorities and Dutch colleagues identify the victims.", + "length": 146 + }, + { + "text": "The sources said that as the pilot approached Tripoli International Airport, he took the plane off auto-pilot hoping to manually land the aircraft.", + "length": 147 + }, + { + "text": "He suffered multiple fractures to his legs and underwent surgery at Al Khadra Hospital, said a doctor at the hospital who declined to give her name.", + "length": 148 + }, + { + "text": "The plane, an Airbus A330-200, originated in Johannesburg, South Africa, and was at the end of its nearly nine-hour flight when it crashed Wednesday.", + "length": 149 + }, + { + "text": "Passengers from Libya, South Africa, Belgium, Austria, Germany, France, Zimbabwe and Britain were also among the victims, the airline said on its Web site.", + "length": 155 + }, + { + "text": "\" Ruben's family said it has to deal with \"two kinds of grief\" -- the sorrow the boy is enduring and the sadness over the loss of the other family members.", + "length": 155 + }, + { + "text": "But the effort was too late and the plane slammed violently into the ground, explaining the condition of the wreckage and damage to the plane's tail at the crash site, the sources said.", + "length": 185 + }, + { + "text": "Ruben's family issued a statement Friday expressing gratitude to people who have helped them, such as Libyan hospital professionals and Dutch envoys, as well as the outpourings of sympathy from citizens in both countries.", + "length": 221 + }, + { + "text": "(CNN) -- Low visibility caused by mist and sand created poor flying conditions for the pilot of an Afriqiyah Airways flight that crashed just before landing in Tripoli, Libyan sources with knowledge of the investigation said Saturday.", + "length": 235 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8443239331245422 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:07.76047322Z", + "first_section_created": "2025-12-23T09:36:07.760833135Z", + "last_section_published": "2025-12-23T09:36:07.761028143Z", + "all_results_received": "2025-12-23T09:36:07.820626144Z", + "output_generated": "2025-12-23T09:36:07.82078735Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:07.760833135Z", + "publish_time": "2025-12-23T09:36:07.761028143Z", + "first_worker_start": "2025-12-23T09:36:07.761653668Z", + "last_worker_end": "2025-12-23T09:36:07.819693Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:07.761622567Z", + "start_time": "2025-12-23T09:36:07.761719671Z", + "end_time": "2025-12-23T09:36:07.761778973Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:07.761859Z", + "start_time": "2025-12-23T09:36:07.762014Z", + "end_time": "2025-12-23T09:36:07.819693Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:07.761636667Z", + "start_time": "2025-12-23T09:36:07.761683969Z", + "end_time": "2025-12-23T09:36:07.761754172Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:07.761577165Z", + "start_time": "2025-12-23T09:36:07.761653668Z", + "end_time": "2025-12-23T09:36:07.76170607Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2704, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/006f36220493c5a5d9c586c9c544091a6a2c9a0e.json b/data/output/006f36220493c5a5d9c586c9c544091a6a2c9a0e.json new file mode 100644 index 0000000..c470932 --- /dev/null +++ b/data/output/006f36220493c5a5d9c586c9c544091a6a2c9a0e.json @@ -0,0 +1,254 @@ +{ + "file_name": "006f36220493c5a5d9c586c9c544091a6a2c9a0e.txt", + "total_words": 399, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "i", + "count": 12 + }, + { + "word": "schleck", + "count": 10 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "tour", + "count": 8 + }, + { + "word": "is", + "count": 7 + }, + { + "word": "was", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "'I'm hugely disappointed.", + "length": 25 + }, + { + "text": "This is a huge blow for me.", + "length": 27 + }, + { + "text": "'My knee is too damaged from the crash.", + "length": 39 + }, + { + "text": "I was ready to help him defend his GC ambitions.", + "length": 48 + }, + { + "text": "'I believed until that moment that I would start.", + "length": 49 + }, + { + "text": "'I'm sad to let the team down, to let Frank down.", + "length": 49 + }, + { + "text": "I felt I was progressing, everything was coming back.", + "length": 53 + }, + { + "text": "5-kilometre fourth stage from Le Touquet-Paris-Plage to Lille.", + "length": 62 + }, + { + "text": "I was so happy to be here, racing with Frank in the Tour again.", + "length": 63 + }, + { + "text": "I think I ignored the pain somehow, hiding it in the back of my head.", + "length": 69 + }, + { + "text": "Schleck (Trek Factory Racing) announced he would not be starting the 163.", + "length": 73 + }, + { + "text": "He wrote on Twitter: 'Very disappointed to let you know that I will not be able to start.", + "length": 89 + }, + { + "text": "Bad moment: Luxembourg's Andy Schleck has been ruled out of the Tour with a ligament injury .", + "length": 93 + }, + { + "text": "Down and out: Schleck suffered a heavy fall a Le Tour headed through Epping Forest on Monday .", + "length": 94 + }, + { + "text": "Trek said Schleck would travel to Basle, Switzerland, for further examination and possible surgery.", + "length": 99 + }, + { + "text": "Collision: A pedestrian taking photos was knocked over during stage 3 of the Tour de France on Monday .", + "length": 103 + }, + { + "text": "Moment of impact: Luxembourg's Andy Schleck collided with the pedestrian but managed to get to his feet .", + "length": 105 + }, + { + "text": "Raring to go: Schleck in a training session prior to the start of the Tour which he is unable to complete .", + "length": 107 + }, + { + "text": "The 29-year-old managed to get back to his feet and finished the race but will now take no further part in the Tour.", + "length": 116 + }, + { + "text": "He was riding the Tour in support of his brother Frank, who is already more than a minute behind race leader Vincenzo Nibali.", + "length": 125 + }, + { + "text": "' The crash occurred when, spotting a pedestrian stood virtually in the road, racers were forced to break suddenly and Schleck fell from his bike.", + "length": 146 + }, + { + "text": "Andy Schleck, the 2010 winner, is out of the Tour de France following a crash involving a spectator on Monday's third stage from Cambridge to London.", + "length": 149 + }, + { + "text": "Schleck added in a team statement: 'I went on the rollers as soon as we arrived in Le Touquet, to get the muscles and tendons warmed up, but the pain is too much.", + "length": 162 + }, + { + "text": "' Schleck is a shadow of the figure who duelled with Alberto Contador for the 2010 yellow jersey and inherited the title after the Spaniard's win was quashed due to a doping violation.", + "length": 184 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8975030183792114 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:08.261198993Z", + "first_section_created": "2025-12-23T09:36:08.262840259Z", + "last_section_published": "2025-12-23T09:36:08.262981365Z", + "all_results_received": "2025-12-23T09:36:08.328813117Z", + "output_generated": "2025-12-23T09:36:08.328990724Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:08.262840259Z", + "publish_time": "2025-12-23T09:36:08.262981365Z", + "first_worker_start": "2025-12-23T09:36:08.263552588Z", + "last_worker_end": "2025-12-23T09:36:08.327869Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:08.263562288Z", + "start_time": "2025-12-23T09:36:08.263633091Z", + "end_time": "2025-12-23T09:36:08.263689793Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:08.263746Z", + "start_time": "2025-12-23T09:36:08.263874Z", + "end_time": "2025-12-23T09:36:08.327869Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:08.263494385Z", + "start_time": "2025-12-23T09:36:08.263557488Z", + "end_time": "2025-12-23T09:36:08.26361609Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:08.263495285Z", + "start_time": "2025-12-23T09:36:08.263552588Z", + "end_time": "2025-12-23T09:36:08.263577389Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2149, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/006f475a6f4c972d20c491433156a6c28cfacd99.json b/data/output/006f475a6f4c972d20c491433156a6c28cfacd99.json new file mode 100644 index 0000000..13bcfde --- /dev/null +++ b/data/output/006f475a6f4c972d20c491433156a6c28cfacd99.json @@ -0,0 +1,322 @@ +{ + "file_name": "006f475a6f4c972d20c491433156a6c28cfacd99.txt", + "total_words": 667, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "she", + "count": 16 + }, + { + "word": "that", + "count": 13 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "i", + "count": 12 + }, + { + "word": "her", + "count": 11 + }, + { + "word": "minard", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "\"...", + "length": 4 + }, + { + "text": "Yes, she concedes.", + "length": 18 + }, + { + "text": "Then there was romance.", + "length": 23 + }, + { + "text": "After that there was marriage.", + "length": 30 + }, + { + "text": "(CNN) -- First there was friendship.", + "length": 36 + }, + { + "text": "\"And now I have a strong faith in God.", + "length": 38 + }, + { + "text": "But she still thinks her expulsion is unfair.", + "length": 45 + }, + { + "text": "\" What you need to know about same-sex marriage .", + "length": 49 + }, + { + "text": "\" Did Minard sign the school's morality covenant?", + "length": 49 + }, + { + "text": "\" Love wins in gay couple's 40-year immigration fight .", + "length": 55 + }, + { + "text": "\" Meanwhile, her life was changing in other ways as well.", + "length": 57 + }, + { + "text": "Minard thinks she is singled out because she's a lesbian.", + "length": 57 + }, + { + "text": "She had planned to become a strength and conditioning coach.", + "length": 60 + }, + { + "text": "About 3½ years ago, she met her future spouse, Kadyn Park.", + "length": 61 + }, + { + "text": "They started out as friends, and romance blossomed over time.", + "length": 61 + }, + { + "text": "\"I had questions, but I worked through those questions,\" she said.", + "length": 66 + }, + { + "text": "Utah same-sex marriages already done are valid, appeals court rules .", + "length": 69 + }, + { + "text": "\"We eventually fell in love and decided to get married,\" Minard said.", + "length": 69 + }, + { + "text": "And I believe you can still have faith in God and live a gay lifestyle.", + "length": 71 + }, + { + "text": "struggled\" with the idea that her faith was at odds with her sexuality.", + "length": 71 + }, + { + "text": "\" Sjoberg added, \"We therefore cannot comment on your specific request.", + "length": 71 + }, + { + "text": "\" Her educational and professional path, though, is now far from clear.", + "length": 71 + }, + { + "text": "Having grown up in the Lutheran Church, Minard notes that she \"at first ...", + "length": 75 + }, + { + "text": "\"Due to this recent event, you will not be able to attend SCU in the future.", + "length": 76 + }, + { + "text": "\"Once I graduated,\" Minard said, \"I was willing to go wherever life took me.", + "length": 76 + }, + { + "text": "Yet Minard feels that she's in the right place in her own spiritual journey.", + "length": 76 + }, + { + "text": "\"As an American and a Christian, I do respect your choice,\" the administrator wrote.", + "length": 84 + }, + { + "text": "\"(But) I have to uphold the Lifestyle Covenant at SCU and confront you with our position.", + "length": 89 + }, + { + "text": "The couple wed March 17 in Albuquerque, New Mexico -- a state where same-sex marriage is legal.", + "length": 95 + }, + { + "text": "Plenty of other students violate the contract in one way or another without being expelled, she says.", + "length": 101 + }, + { + "text": "In addition to the emotional sting, the 22-year-old says she is now stuck personally -- not knowing what to do, or where to go next.", + "length": 132 + }, + { + "text": "It's going to be hard to get into classes that may be full, because they gave me very little notice before the fall semester starts.", + "length": 132 + }, + { + "text": "Such a union is in apparent conflict with the \"lifestyle covenant\" of the university \"that all students must agree and sign,\" he added.", + "length": 135 + }, + { + "text": "\"I stayed on without a scholarship,\" Minard said, \"because I was so invested in the university and knew that some credits wouldn't transfer to other schools.", + "length": 157 + }, + { + "text": "\"I'm trying to figure out how and where I can graduate,\" she told CNN, noting that she'll have to start paying off her loans at the end of next year unless things change.", + "length": 170 + }, + { + "text": "The latter value includes \"building a Christ-centered community,\" \"honoring our Pentecostal Holiness heritage\" and \"respecting diversity and various Christian backgrounds.", + "length": 171 + }, + { + "text": "And now, at what would have been a few weeks from her senior year in college, Christian Minard finds herself expelled from school -- because the person she married is another woman.", + "length": 181 + }, + { + "text": "In a letter from earlier this month that Minard shared with CNN, an administrator at Southwestern Christian University noted that he'd been told of Minard's same-sex marriage and saw pictures of it posted to Facebook.", + "length": 217 + }, + { + "text": "\" Minard came to the school on scholarship for basketball, though her playing career was cut short after doctors told her -- after she'd suffered multiple concussions -- that she should avoid sports with physical contact.", + "length": 221 + }, + { + "text": "Believes 'gay lifestyle' compatible with 'faith in God' Located in the metropolitan Oklahoma City community of Bethany, Southwestern Christian University's website states the school's three core values are scholarship, service and spirit.", + "length": 238 + }, + { + "text": "\" When asked to confirm that the school -- which describes itself as part of the \"International Pentecostal Holiness\" denomination -- sent the letter and to elaborate on the decision, the school's provost, Connie Sjoberg, said only that federal law \"prohibits us from confirming if an individual is or has been a student at our institution.", + "length": 340 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5437179803848267 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:08.763731938Z", + "first_section_created": "2025-12-23T09:36:08.765483308Z", + "last_section_published": "2025-12-23T09:36:08.765752419Z", + "all_results_received": "2025-12-23T09:36:08.824048368Z", + "output_generated": "2025-12-23T09:36:08.824250376Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:08.765483308Z", + "publish_time": "2025-12-23T09:36:08.765752419Z", + "first_worker_start": "2025-12-23T09:36:08.766209038Z", + "last_worker_end": "2025-12-23T09:36:08.823114Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:08.766238539Z", + "start_time": "2025-12-23T09:36:08.766320442Z", + "end_time": "2025-12-23T09:36:08.766409146Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:08.766408Z", + "start_time": "2025-12-23T09:36:08.766556Z", + "end_time": "2025-12-23T09:36:08.823114Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:08.766194837Z", + "start_time": "2025-12-23T09:36:08.76626024Z", + "end_time": "2025-12-23T09:36:08.766346443Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:08.766144435Z", + "start_time": "2025-12-23T09:36:08.766209038Z", + "end_time": "2025-12-23T09:36:08.766238539Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3929, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/006f6b4b073292295b28ef6e1a86d7a335496a5c.json b/data/output/006f6b4b073292295b28ef6e1a86d7a335496a5c.json new file mode 100644 index 0000000..e2d0af5 --- /dev/null +++ b/data/output/006f6b4b073292295b28ef6e1a86d7a335496a5c.json @@ -0,0 +1,214 @@ +{ + "file_name": "006f6b4b073292295b28ef6e1a86d7a335496a5c.txt", + "total_words": 254, + "top_n_words": [ + { + "word": "the", + "count": 15 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "her", + "count": 5 + }, + { + "word": "of", + "count": 5 + }, + { + "word": "on", + "count": 5 + }, + { + "word": "s", + "count": 5 + }, + { + "word": "to", + "count": 5 + }, + { + "word": "is", + "count": 4 + }, + { + "word": "she", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "\" Mel B.", + "length": 8 + }, + { + "text": "to fill the empty seat.", + "length": 23 + }, + { + "text": "Someone who has a very strong point of view.", + "length": 44 + }, + { + "text": "We needed somebody who was qualified for the job.", + "length": 49 + }, + { + "text": "She's an amazing singer, dancer and a huge personality.", + "length": 55 + }, + { + "text": "Brown, aka Scary Spice, is a veteran of reality competitions.", + "length": 61 + }, + { + "text": "\"To know her is to know a very frank, strong, enduring entertainer.", + "length": 67 + }, + { + "text": "(CNN) -- With Sharon Osbourne gone, \"America's Got Talent\" has tapped Spice Girl Mel B.", + "length": 87 + }, + { + "text": "Osbourne announced she was leaving \"AGT\" last year following claims that NBC discriminated against her son, Jack.", + "length": 113 + }, + { + "text": "The contestants will get a lot of constructive feedback from her and I can't wait to see her chemistry with Howard and Howie.", + "length": 125 + }, + { + "text": "might not be the only new face on the series this summer -- EW adds that the show is considering hiring a fourth judge as well.", + "length": 127 + }, + { + "text": "She competed on \"Dancing With the Stars\" in the United States, served as a judge on the Australian version of \"The X Factor\" and was a guest judge on the UK's \"X Factor\" and \"Britain's Next Top Model.", + "length": 200 + }, + { + "text": "Entertainment Weekly reports that the singer and TV personality, whose full name is Melanie Brown, will join fellow judges Howard Stern and Howie Mandel on the NBC reality competition when it returns this summer.", + "length": 212 + }, + { + "text": "\" \"I've known Melanie since she did 'Dancing with the Stars,' and I've known her as a performer in The Spice Girls before that,\" Paul Telegdy, NBC's president of alternative and late night programming, said in a statement to EW.", + "length": 228 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5033206939697266 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:09.266539394Z", + "first_section_created": "2025-12-23T09:36:09.266847507Z", + "last_section_published": "2025-12-23T09:36:09.267044515Z", + "all_results_received": "2025-12-23T09:36:09.3287371Z", + "output_generated": "2025-12-23T09:36:09.328858505Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:09.266847507Z", + "publish_time": "2025-12-23T09:36:09.267044515Z", + "first_worker_start": "2025-12-23T09:36:09.267474432Z", + "last_worker_end": "2025-12-23T09:36:09.327844Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:09.267530634Z", + "start_time": "2025-12-23T09:36:09.267583236Z", + "end_time": "2025-12-23T09:36:09.267610437Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:09.267754Z", + "start_time": "2025-12-23T09:36:09.267904Z", + "end_time": "2025-12-23T09:36:09.327844Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:09.267508233Z", + "start_time": "2025-12-23T09:36:09.267570036Z", + "end_time": "2025-12-23T09:36:09.267608837Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:09.26741793Z", + "start_time": "2025-12-23T09:36:09.267474432Z", + "end_time": "2025-12-23T09:36:09.267485932Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1410, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/006f6c5843c79d2fce7c6e1d57636eeb8eb2cccb.json b/data/output/006f6c5843c79d2fce7c6e1d57636eeb8eb2cccb.json new file mode 100644 index 0000000..c627b72 --- /dev/null +++ b/data/output/006f6c5843c79d2fce7c6e1d57636eeb8eb2cccb.json @@ -0,0 +1,278 @@ +{ + "file_name": "006f6c5843c79d2fce7c6e1d57636eeb8eb2cccb.txt", + "total_words": 543, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "david", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "jones", + "count": 11 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "woolworths", + "count": 7 + }, + { + "word": "by", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "and Aap .", + "length": 9 + }, + { + "text": "Daniel Mills .", + "length": 14 + }, + { + "text": "2 billion takeover.", + "length": 19 + }, + { + "text": "50 above where it was a year ago.", + "length": 33 + }, + { + "text": "'It's tragic that it's come to this.", + "length": 36 + }, + { + "text": "He appeared to abstain from Monday's vote.", + "length": 42 + }, + { + "text": "Mr Lobb has a long history with David Jones.", + "length": 44 + }, + { + "text": "But the biggest winner is retail mogul Solomon Lew, who bought a 9.", + "length": 67 + }, + { + "text": "ASIC could raise objections at a Federal Court hearing on Thursday.", + "length": 67 + }, + { + "text": "At least we can get $4 per share and get out,' he told the meeting.", + "length": 67 + }, + { + "text": "2 billion takeover of the company by South African based Woolworths .", + "length": 69 + }, + { + "text": "He says David Jones had been damaged by poor decisions by management and the board.", + "length": 83 + }, + { + "text": "'I think they have potentially been far too generous to Mr Lew than they needed to be,' he said.", + "length": 96 + }, + { + "text": "Some shareholders have accused David Jones management of giving up on the Iconic Australian company .", + "length": 101 + }, + { + "text": "David Jones Chairman Gordon Cairns left, and CEO Paul Zahra following a shareholder meeting for the $2.", + "length": 103 + }, + { + "text": "His mother worked there in the early 1900s and he's owned shares since it was publicly floated in the mid-1990s.", + "length": 112 + }, + { + "text": "Woolworths plans to overhaul David Jones by expanding its private label brands, improving in-store service and website.", + "length": 119 + }, + { + "text": "Australian Shareholders Association spokesman Stephen Mayne said Mr Lew would not have had enough votes to block the takeover.", + "length": 126 + }, + { + "text": "The $4 offer was a solid outcome for shareholders, being more than 80 cents higher from where David Jones was trading prior to the bid and $1.", + "length": 142 + }, + { + "text": "More than 176 years after its first store opened in Sydney, David Jones is passing into foreign hands after shareholders overwhelmingly backed a $2.", + "length": 148 + }, + { + "text": "Woolworths has agreed to do that, offering more than $200 million for his Country Road shares on the condition he didn't block the David Jones takeover.", + "length": 152 + }, + { + "text": "South African retailer Woolworths Holdings will take control of Australia's oldest department store - which has been trading for more than 176 years, in August .", + "length": 162 + }, + { + "text": "A handful of shareholders spoke out against the sale at a meeting in Sydney on Monday, with some accusing the David Jones board of 'giving up' on the Australian icon.", + "length": 166 + }, + { + "text": "But most backed the move, with almost 97 per cent votes cast in favour of accepting the $4-a-share offer, though some, like retiree Reg Lobb, did so with heavy hearts.", + "length": 167 + }, + { + "text": "9 per cent stake in the retailer after the takeover was announced, apparently to use it as leverage in a battle with Woolworths Holdings over fashion chain Country Road.", + "length": 169 + }, + { + "text": "'We look forward to implementing our plans to reinvigorate David Jones and enhance the shopping experience for its customers,' chief executive Ian Moir said in a statement.", + "length": 172 + }, + { + "text": "The Australian Securities and Investments Commission (ASIC) has expressed concern over the offer to Mr Lew and whether it constitutes a benefit not available to other David Jones shareholders.", + "length": 192 + }, + { + "text": "South African retailer Woolworths Holdings will take control of Australia's oldest department store in August, pending court approval, and hopes to significantly improve its performance after years of sliding sales and profits.", + "length": 227 + }, + { + "text": "Almost all Country Road shares are held by either Woolworths, which owns nearly 88 per cent, or Mr Lew, who owns just under 12 per cent, and the billionaire has reportedly been agitating the South African company to buy him out for years.", + "length": 238 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.48354700207710266 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:09.767811289Z", + "first_section_created": "2025-12-23T09:36:09.768175703Z", + "last_section_published": "2025-12-23T09:36:09.768370911Z", + "all_results_received": "2025-12-23T09:36:09.829024555Z", + "output_generated": "2025-12-23T09:36:09.829213062Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:09.768175703Z", + "publish_time": "2025-12-23T09:36:09.768370911Z", + "first_worker_start": "2025-12-23T09:36:09.768908333Z", + "last_worker_end": "2025-12-23T09:36:09.828094Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:09.768941734Z", + "start_time": "2025-12-23T09:36:09.769015737Z", + "end_time": "2025-12-23T09:36:09.76908794Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:09.769028Z", + "start_time": "2025-12-23T09:36:09.769168Z", + "end_time": "2025-12-23T09:36:09.828094Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:09.76884403Z", + "start_time": "2025-12-23T09:36:09.768908333Z", + "end_time": "2025-12-23T09:36:09.768978936Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:09.768922433Z", + "start_time": "2025-12-23T09:36:09.768971535Z", + "end_time": "2025-12-23T09:36:09.768994636Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3171, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/006f6ccf37a68ba56ef78009fc5e9e0ec454338a.json b/data/output/006f6ccf37a68ba56ef78009fc5e9e0ec454338a.json new file mode 100644 index 0000000..29e9111 --- /dev/null +++ b/data/output/006f6ccf37a68ba56ef78009fc5e9e0ec454338a.json @@ -0,0 +1,210 @@ +{ + "file_name": "006f6ccf37a68ba56ef78009fc5e9e0ec454338a.txt", + "total_words": 363, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "hemp", + "count": 18 + }, + { + "word": "seed", + "count": 10 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "yogurt", + "count": 9 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "air", + "count": 7 + }, + { + "word": "force", + "count": 7 + }, + { + "word": "oil", + "count": 6 + }, + { + "word": "per", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "The war on drugs has a new enemy, Greek yogurt.", + "length": 47 + }, + { + "text": "The Air Force banned hemp-derived products in 1999.", + "length": 51 + }, + { + "text": "’ Based on those numbers, it is unlikely the yogurt would cause a failed test.", + "length": 80 + }, + { + "text": "Banned: This yogurt has been banned by the US Air Force for containing hemp seeds .", + "length": 83 + }, + { + "text": "‘The ingestion of products containing or products derived from hemp seed or hemp seed oil is prohibited.", + "length": 106 + }, + { + "text": "Blueberry is the only Chobani flavour containing hemp seed, which a company spokesperson told the Air Force Times has about 10 grams worth.", + "length": 139 + }, + { + "text": "’ With the Air Force threshold for a failing test coming in at 50 part of THC per billion per 100 grams, according to the Times, the yogurt should be safe.", + "length": 157 + }, + { + "text": "Hemp and hemp seed oil products have been banned because they can lead to violations of the military branch’s drug testing program, according to regulations.", + "length": 159 + }, + { + "text": "The US Air Force has banned blueberry-flavored Chobani yogurt because it contains hemp seeds – which themselves contain THC, the active ingredient in marijuana – which could lead to positive results during drug testing.", + "length": 223 + }, + { + "text": "’ The yogurt comp[any spokesperson defended their product, telling the Times that ‘the THC level of the hemp seeds is less than 10 parts per million per 100 grams of hemp seeds, so the maximum amount of THC in the yogurt would be 1 part per million.", + "length": 253 + }, + { + "text": "‘Studies have shown that products made with hemp seed and hemp seed oil may contain varying levels of tetrahydrocannabinol (THC), an active ingredient of marijuana which is detectable under the Air Force Drug Testing Program,’ states a regulation quoted by the Times.", + "length": 271 + }, + { + "text": "‘Based upon the research we have conducted from the published literature available, the findings suggest that hemp food consumption is not likely to meet this threshold if THC levels in hemp oil and hulled seeds are maintained below 5 and 2 parts per million respectively.", + "length": 274 + }, + { + "text": "‘The Air Force has not restricted military members from consuming Chobani Greek yogurt,’ Captain Adam Koudelka told the Times, adding that ‘only Chobani yogurt that contains hemp seed or hemp seed oil is prohibited, just as any product which contains or is derived from hemp seed or hemp seed oil is prohibited.", + "length": 317 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.742999255657196 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:10.269115184Z", + "first_section_created": "2025-12-23T09:36:10.269448098Z", + "last_section_published": "2025-12-23T09:36:10.269594204Z", + "all_results_received": "2025-12-23T09:36:10.33479323Z", + "output_generated": "2025-12-23T09:36:10.334925536Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:10.269448098Z", + "publish_time": "2025-12-23T09:36:10.269594204Z", + "first_worker_start": "2025-12-23T09:36:10.270138726Z", + "last_worker_end": "2025-12-23T09:36:10.333854Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:10.270090424Z", + "start_time": "2025-12-23T09:36:10.270147226Z", + "end_time": "2025-12-23T09:36:10.270174427Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:10.270332Z", + "start_time": "2025-12-23T09:36:10.270453Z", + "end_time": "2025-12-23T09:36:10.333854Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:10.270120225Z", + "start_time": "2025-12-23T09:36:10.270180427Z", + "end_time": "2025-12-23T09:36:10.27024213Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:10.270070723Z", + "start_time": "2025-12-23T09:36:10.270138726Z", + "end_time": "2025-12-23T09:36:10.270155626Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2169, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/006f72f0ad668d923074eac5fdd5a11d5931ca95.json b/data/output/006f72f0ad668d923074eac5fdd5a11d5931ca95.json new file mode 100644 index 0000000..95c8174 --- /dev/null +++ b/data/output/006f72f0ad668d923074eac5fdd5a11d5931ca95.json @@ -0,0 +1,400 @@ +{ + "file_name": "006f72f0ad668d923074eac5fdd5a11d5931ca95.txt", + "total_words": 906, + "top_n_words": [ + { + "word": "the", + "count": 48 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "public", + "count": 19 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "school", + "count": 16 + }, + { + "word": "money", + "count": 13 + }, + { + "word": "students", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "C.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "and Canada.", + "length": 11 + }, + { + "text": "Washington, D.", + "length": 14 + }, + { + "text": "Then you do the math.", + "length": 21 + }, + { + "text": "The neighbors hated it.", + "length": 23 + }, + { + "text": "See details about K-12 schools, teachers » .", + "length": 45 + }, + { + "text": "After all, it's your money they want to take.", + "length": 45 + }, + { + "text": "Chavis has also worked as a real estate investor.", + "length": 49 + }, + { + "text": "If you act like a fool, you'll be treated like a fool.", + "length": 54 + }, + { + "text": "The school was in many ways a failure, a joke, a sham.", + "length": 54 + }, + { + "text": "The opinions expressed in this commentary are solely those of Ben Chavis.", + "length": 73 + }, + { + "text": "Have you ever met a public school administrator who said they have enough money?", + "length": 80 + }, + { + "text": "When I took over as principal in 2000, it was the worst middle school in Oakland.", + "length": 81 + }, + { + "text": "But does the data support the argument that our schools need more money to succeed?", + "length": 83 + }, + { + "text": "The American public has been conned into believing that public schools need more money.", + "length": 87 + }, + { + "text": "Before I became its principal, people called American Indian Public Charter School the zoo.", + "length": 91 + }, + { + "text": "The United States spends more money on public education than any other country in the world.", + "length": 92 + }, + { + "text": "AIPCS students spend three to four hours a day working on mathematics and English-language arts.", + "length": 96 + }, + { + "text": "Educator Ben Chavis says money isn't enough to improve schools run by incompetent administrators.", + "length": 97 + }, + { + "text": "The charter school is now one of the top-scoring schools in the state and is nationally recognized.", + "length": 99 + }, + { + "text": "In 2009, they excelled in academics, physical fitness and any standardized test that they were given.", + "length": 101 + }, + { + "text": "I told the board I would take the job only if they let me go my own way and do what I thought was best.", + "length": 103 + }, + { + "text": "We need proven leaders who can prepare our children to be competitive members in a free-market society.", + "length": 103 + }, + { + "text": "They couldn't stand the behavior of the students, who, with little supervision or control, wreaked havoc in the area.", + "length": 117 + }, + { + "text": "accountability be attached to the stimulus money that is being awarded to all institutions, including public schools.", + "length": 117 + }, + { + "text": "I believe all the money in the world would not be enough to improve schools run by incompetent public school administrators.", + "length": 124 + }, + { + "text": "What have we, the public taxpayers, received for our exceptionally generous financial support of the Oakland public schools?", + "length": 124 + }, + { + "text": "According to the California Department of Education, the district's reported 2008 California Standardized Test scores show: .", + "length": 125 + }, + { + "text": "Editor's note: Ben Chavis is the co-author with Carey Blakely of \"Crazy Like A Fox: One Principal's Triumph in the Inner City.", + "length": 126 + }, + { + "text": "Unfortunately, the students who decided to attend the school did not receive the academics and structure they so direly needed.", + "length": 127 + }, + { + "text": "That budget, which includes $77 million spent on consultants, means that the district spends an average of $16,270 per student!", + "length": 127 + }, + { + "text": "Currently, he is replicating the model he established at American Indian Public Charter School in various schools throughout the U.", + "length": 131 + }, + { + "text": "Does anyone believe providing more money to these public school systems will enhance these students' academic performance in mathematics?", + "length": 137 + }, + { + "text": "(CNN) -- Teachers unions and politicians are constantly claiming that K-12 public schools need more money in order to produce good academic results.", + "length": 148 + }, + { + "text": "President Obama is moving in the \"right\" direction by reforming public schools to be held responsible to the American public in return for more money.", + "length": 150 + }, + { + "text": "During my principalship at American Indian Public Charter School, we spent less than $8,000 per student, proving that schools did not need more money.", + "length": 150 + }, + { + "text": "I implemented a golden rule at American Indian Public Charter School for staff, students and families: If you act like a winner, you'll be treated like a winner.", + "length": 161 + }, + { + "text": "The Oakland Unified School District had a budget of $602 million for the 2008-2009 school year, according to Katy Murphy, an education reporter with the Oakland Tribune.", + "length": 169 + }, + { + "text": "\" Chavis received his doctorate in education and philosophy from the University of Arizona and served as principal of American Indian Public Charter School for seven years.", + "length": 172 + }, + { + "text": "Next time you hear school officials or politicians begging for more money, ask them how large the district's budget is and how many students are enrolled in their district.", + "length": 172 + }, + { + "text": "Of 2,506 ninth- and 10th-grade students who took the California Standards test in algebra: 0 percent tested advanced, 3 percent tested proficient and 97 percent failed the test.", + "length": 177 + }, + { + "text": "How is it possible for a public school system to so liberally spend more than half a billion dollars and still fail to educate 94 percent or more students of all racial backgrounds?", + "length": 181 + }, + { + "text": "Yet, we still have a secondary public education system that ranks with Third World countries in preparing our children in English-language arts, mathematics, science and social studies.", + "length": 185 + }, + { + "text": "We served a student population that is on average 98 percent minority, with 97 percent receiving free or reduced-price lunch and many who are non-English speakers and from single-parent families.", + "length": 195 + }, + { + "text": "Of 707 eighth- and ninth-graders who took the California Standard test for general math: 1 percent tested advanced, 5 percent tested proficient and 94 percent failed by testing below grade level.", + "length": 195 + }, + { + "text": "The hard work of these students and staff has paid off with virtually all of our eighth-graders testing advanced in algebra, including 100 percent of our eighth-grade black students, Mexican-American students and American Indian students.", + "length": 238 + }, + { + "text": "; Detroit, Michigan; Los Angeles, California; Kansas City, Missouri; and numerous other cities throughout the United States are producing the same poor academic results at an extraordinary cost to the taxpayer and a tremendous academic loss to our students and country.", + "length": 269 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6060205101966858 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:10.770382979Z", + "first_section_created": "2025-12-23T09:36:10.770758494Z", + "last_section_published": "2025-12-23T09:36:10.771115708Z", + "all_results_received": "2025-12-23T09:36:10.852034969Z", + "output_generated": "2025-12-23T09:36:10.852191775Z", + "total_processing_time_ms": 81, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 80, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:10.770758494Z", + "publish_time": "2025-12-23T09:36:10.771018104Z", + "first_worker_start": "2025-12-23T09:36:10.771516324Z", + "last_worker_end": "2025-12-23T09:36:10.845029Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:10.771588627Z", + "start_time": "2025-12-23T09:36:10.77166243Z", + "end_time": "2025-12-23T09:36:10.771746734Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:10.771831Z", + "start_time": "2025-12-23T09:36:10.771965Z", + "end_time": "2025-12-23T09:36:10.845029Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:10.771434921Z", + "start_time": "2025-12-23T09:36:10.771516324Z", + "end_time": "2025-12-23T09:36:10.771610328Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:10.771556026Z", + "start_time": "2025-12-23T09:36:10.771619128Z", + "end_time": "2025-12-23T09:36:10.77166433Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:10.771067706Z", + "publish_time": "2025-12-23T09:36:10.771115708Z", + "first_worker_start": "2025-12-23T09:36:10.771627829Z", + "last_worker_end": "2025-12-23T09:36:10.851223Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:10.771642029Z", + "start_time": "2025-12-23T09:36:10.771676331Z", + "end_time": "2025-12-23T09:36:10.771687831Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:10.771984Z", + "start_time": "2025-12-23T09:36:10.772168Z", + "end_time": "2025-12-23T09:36:10.851223Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:10.771761734Z", + "start_time": "2025-12-23T09:36:10.771922541Z", + "end_time": "2025-12-23T09:36:10.771945942Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:10.771574327Z", + "start_time": "2025-12-23T09:36:10.771627829Z", + "end_time": "2025-12-23T09:36:10.77165473Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 152, + "min_processing_ms": 73, + "max_processing_ms": 79, + "avg_processing_ms": 76, + "median_processing_ms": 79, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2714, + "slowest_section_id": 1, + "slowest_section_time_ms": 80 + } +} diff --git a/data/output/006fac336c8dbb4d22b35d9376281a7be31c4b11.json b/data/output/006fac336c8dbb4d22b35d9376281a7be31c4b11.json new file mode 100644 index 0000000..fca7666 --- /dev/null +++ b/data/output/006fac336c8dbb4d22b35d9376281a7be31c4b11.json @@ -0,0 +1,306 @@ +{ + "file_name": "006fac336c8dbb4d22b35d9376281a7be31c4b11.txt", + "total_words": 770, + "top_n_words": [ + { + "word": "the", + "count": 53 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "has", + "count": 14 + }, + { + "word": "it", + "count": 13 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "i", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "speed", + "count": 10 + }, + { + "word": "been", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Carol Driver .", + "length": 14 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "People fear for their safety.", + "length": 29 + }, + { + "text": "The £27m centre was opened in December 2013 around 1.", + "length": 54 + }, + { + "text": "'One driver was clocked doing 63mph through the village.", + "length": 56 + }, + { + "text": "I'm just waiting for them to present something in writing.", + "length": 58 + }, + { + "text": "Straight away it had an impact on the speed people drove through the village.", + "length": 77 + }, + { + "text": "'If the council want to take me to court over this one I'm willing to fight them.", + "length": 81 + }, + { + "text": "'It is vital that any scheme introduced is effective and delivers the required result.", + "length": 86 + }, + { + "text": "'Not only has the road become heavy with traffic but there's no regard for speed limits.", + "length": 88 + }, + { + "text": "Warning: Mr Fawcett has been told by the council to remove the camera or face legal action .", + "length": 92 + }, + { + "text": "Fake camera: Chris Fawcett installed the replica on the A303 to encourage tourists to slow down .", + "length": 97 + }, + { + "text": "'I have had villagers coming up to me to thank me for what I've done because it's had such an effect.", + "length": 101 + }, + { + "text": "5 miles from the site of Stonehenge, a prehistoric monument which forms part of a World Heritage Site.", + "length": 102 + }, + { + "text": "They claim it is on their land but I have seen my next door neighbour's deeds which show the land is his.", + "length": 105 + }, + { + "text": "'The council says it has a robust approach to tackling speeding but they haven't done anything to stop it.", + "length": 106 + }, + { + "text": "The speed camera was built to deter tourists from speeding along back roads to Stonehenge visitors centre .", + "length": 107 + }, + { + "text": "The £27m Stonehenge visitors centre has already attracted 500,000 tourists since it opened six months ago .", + "length": 108 + }, + { + "text": "There have been numerous crashes in the last six months and several cats have been killed after being hit by cars.", + "length": 114 + }, + { + "text": "'I built a dummy speed camera with my eight-year-old son Jake and we put it up on land owned by my next door neighbour.", + "length": 119 + }, + { + "text": "'Everyone who lives here is fed up of it so I decided to take matters into my own hands before someone got seriously hurt.", + "length": 122 + }, + { + "text": "The council would ask everyone to follow the approved procedure to ensure the best scheme is delivered to meet the community needs.", + "length": 131 + }, + { + "text": "'Then I got a visit from someone from the council threatening me with legal action if I don't take the camera down within seven days.", + "length": 133 + }, + { + "text": "' A spokesman for Wiltshire Council said: 'The council has a robust system for dealing with speeding issues raised by local communities.", + "length": 136 + }, + { + "text": "'I cannot fathom why, if this simple safety measure is working, they are intent on taking it down yet they aren't doing anything about it themselves.", + "length": 149 + }, + { + "text": "Villagers who built a fake speed camera in a desperate bid to slow traffic caused by the new Stonehenge visitors centre have been threatened with legal action.", + "length": 159 + }, + { + "text": "However Wiltshire Council claims the dummy device has been put up illegally on land they own and it has to be taken down because it was not officially installed.", + "length": 161 + }, + { + "text": "'The considerations to deal with speeding issues will include the use of the volunteer speedwatch scheme, installation of speed indicator devices or other direct measures.", + "length": 171 + }, + { + "text": "Residents of Shrewton, which runs parallel to the A303, reported a dramatic spike in the amount of drivers diverting through the village and ignoring its 30mph speed limit.", + "length": 172 + }, + { + "text": "Chris Fawcett has been warned to remove the home-made replica camera from beside a narrow road through the Shrewton, Wiltshire, within seven days or face court proceedings.", + "length": 172 + }, + { + "text": "Mr Fawcett, a father of three, said the fake speed camera - made from an old Ikea cabinet painted yellow - has been a success and he has been heaped with praise by residents.", + "length": 174 + }, + { + "text": "'Issues should be referred to the local community area board, who will work with the local community to examine what speed control measures are appropriate for that particular road.", + "length": 181 + }, + { + "text": "'We have seen traffic levels through the village rise significantly to the point where people who live here are worried to walk down the road for fear of being hit by a speeding car.", + "length": 182 + }, + { + "text": "As part of the development the A334, which ran directly past the site, was closed and dug up, forcing all visitor traffic onto the A303, the main commuter route form the M5 to the M3 motorways.", + "length": 193 + }, + { + "text": "Mr Fawcett, 49, a car mechanic, said: 'Since the visitors centre has been open the A303 has become incredibly congested and that in turn has made the road through Shrewton into a rat run for drivers trying to dodge queues.", + "length": 222 + }, + { + "text": "Mr Fawcett built the speed camera in a last-ditch effort to slow daytrippers attempting to dodge congestion on the main A303 road brought about by the Stonehenge visitors centre, which has seen 500,000 tourists in six months.", + "length": 225 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8010868430137634 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:11.27135579Z", + "first_section_created": "2025-12-23T09:36:11.271760206Z", + "last_section_published": "2025-12-23T09:36:11.271954214Z", + "all_results_received": "2025-12-23T09:36:11.34124731Z", + "output_generated": "2025-12-23T09:36:11.341441318Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:11.271760206Z", + "publish_time": "2025-12-23T09:36:11.271954214Z", + "first_worker_start": "2025-12-23T09:36:11.272476635Z", + "last_worker_end": "2025-12-23T09:36:11.339122Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:11.272430733Z", + "start_time": "2025-12-23T09:36:11.272500736Z", + "end_time": "2025-12-23T09:36:11.272581239Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:11.272654Z", + "start_time": "2025-12-23T09:36:11.2728Z", + "end_time": "2025-12-23T09:36:11.339122Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:11.272410932Z", + "start_time": "2025-12-23T09:36:11.272476635Z", + "end_time": "2025-12-23T09:36:11.272565239Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:11.272494836Z", + "start_time": "2025-12-23T09:36:11.272550638Z", + "end_time": "2025-12-23T09:36:11.272580639Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4339, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/006fca550042888454e7261eb578ea37febed6d1.json b/data/output/006fca550042888454e7261eb578ea37febed6d1.json new file mode 100644 index 0000000..8453a0e --- /dev/null +++ b/data/output/006fca550042888454e7261eb578ea37febed6d1.json @@ -0,0 +1,246 @@ +{ + "file_name": "006fca550042888454e7261eb578ea37febed6d1.txt", + "total_words": 500, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "to", + "count": 18 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "a", + "count": 7 + }, + { + "word": "said", + "count": 7 + }, + { + "word": "they", + "count": 7 + }, + { + "word": "tribunal", + "count": 7 + }, + { + "word": "accused", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Use your counsel to make your case and zealously protect your rights.", + "length": 69 + }, + { + "text": "\"We will conduct trials based on a firm presumption of innocence of the accused.", + "length": 80 + }, + { + "text": "\" If they can't afford lawyers, tribunal funds are available for hiring legal counsel.", + "length": 86 + }, + { + "text": "Supporters say he was killed because of his opposition to Syrian influence in Lebanon.", + "length": 86 + }, + { + "text": "\"The march to justice is inexorable, and one way or another we will end up with a trial.", + "length": 88 + }, + { + "text": "\" \"We are only acting in the interest of Lebanon; our only motivation is the pursuit of justice.", + "length": 96 + }, + { + "text": "The Tribunal shall never convict anybody unless guilt is established beyond any reasonable doubt,\" he said.", + "length": 107 + }, + { + "text": "As for Lebanese authorities, the judge said he's hopeful they will \"persist in their search for the accused.", + "length": 108 + }, + { + "text": "Cassese said the tribunal \"will appoint the best professionals to represent them in court\" in their absence.", + "length": 108 + }, + { + "text": "Cassese defended the tribunal, saying its personnel \"are doing their job with full independence and impartiality.", + "length": 113 + }, + { + "text": "Hariri, a wealthy entrepreneur turned politician, died when his motorcade was hit by a bomb in Beirut on February 14, 2005.", + "length": 123 + }, + { + "text": "Arrest warrants were issued for Salim Jamil Ayyash, Mustafa Amine Badreddine, Hussein Hassan Oneissi, and Assad Hassan Sabra.", + "length": 125 + }, + { + "text": "But he urged them to appoint legal counsel and pass along instructions to them, even if they choose not to appear before the court.", + "length": 131 + }, + { + "text": "Cassese issued the statement after Lebanese authorities told him they have been unable so far to serve warrants on and arrest the accused.", + "length": 138 + }, + { + "text": "His death prompted mass protests that led to the withdrawal of Syrian troops from Lebanon, who had been in the country for nearly 30 years.", + "length": 139 + }, + { + "text": "\" The judge said that if the accused don't wish to come before the tribunal at The Hague in the Netherlands, they can participate by video link.", + "length": 144 + }, + { + "text": "\"If you believe this Tribunal is illegal or illegitimate, argue this point through legal counsel chosen by you -- you will thus have your voice heard on this issue.", + "length": 164 + }, + { + "text": "(CNN) -- A top judge has issued a special plea to the four suspects named in the killing of former Lebanese Prime Minister Rafik Hariri killing to come before the court.", + "length": 169 + }, + { + "text": "A highly placed source in the Lebanese army, who had correctly given CNN the names of the suspects previously, has said that all four belong to Hezbollah, the Lebanese Shiite militant group.", + "length": 190 + }, + { + "text": "Our exclusive aim is to find the truth about the assassination of 14 February 2005 and other possibly connected criminal cases, while upholding the highest international standards of criminal law.", + "length": 196 + }, + { + "text": "I therefore strongly appeal to the accused to take advantage of the broad legal possibilities offered by our Rules of Procedure and Evidence, thereby contributing to the establishment of truth and the conduct of fair proceedings,\" he said.", + "length": 239 + }, + { + "text": "Judge Antonio Cassese, the president of the Special Tribunal for Lebanon, said in an open letter to the four men accused in the 2005 attack that they will be treated fairly if they appear before the court or even participate in the trial proceedings without being present.", + "length": 272 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5750579237937927 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:11.772741422Z", + "first_section_created": "2025-12-23T09:36:11.774043875Z", + "last_section_published": "2025-12-23T09:36:11.774234983Z", + "all_results_received": "2025-12-23T09:36:11.832961452Z", + "output_generated": "2025-12-23T09:36:11.833126659Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:11.774043875Z", + "publish_time": "2025-12-23T09:36:11.774234983Z", + "first_worker_start": "2025-12-23T09:36:11.774809106Z", + "last_worker_end": "2025-12-23T09:36:11.832Z", + "total_journey_time_ms": 57, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:11.774810706Z", + "start_time": "2025-12-23T09:36:11.774870408Z", + "end_time": "2025-12-23T09:36:11.77492231Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:11.77499Z", + "start_time": "2025-12-23T09:36:11.775129Z", + "end_time": "2025-12-23T09:36:11.832Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:11.774753404Z", + "start_time": "2025-12-23T09:36:11.774861708Z", + "end_time": "2025-12-23T09:36:11.775000414Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:11.774742703Z", + "start_time": "2025-12-23T09:36:11.774809106Z", + "end_time": "2025-12-23T09:36:11.774843007Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2990, + "slowest_section_id": 0, + "slowest_section_time_ms": 57 + } +} diff --git a/data/output/006fd87268698ecbd1344f3b0666f9fa74f43557.json b/data/output/006fd87268698ecbd1344f3b0666f9fa74f43557.json new file mode 100644 index 0000000..f456c75 --- /dev/null +++ b/data/output/006fd87268698ecbd1344f3b0666f9fa74f43557.json @@ -0,0 +1,452 @@ +{ + "file_name": "006fd87268698ecbd1344f3b0666f9fa74f43557.txt", + "total_words": 936, + "top_n_words": [ + { + "word": "the", + "count": 60 + }, + { + "word": "to", + "count": 28 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "skin", + "count": 20 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "sunbeds", + "count": 14 + }, + { + "word": "i", + "count": 11 + }, + { + "word": "cancer", + "count": 10 + }, + { + "word": "damage", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Cancer .", + "length": 8 + }, + { + "text": "any more.", + "length": 9 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "appearance.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "It was scary.", + "length": 13 + }, + { + "text": "Anna Hodgekiss .", + "length": 16 + }, + { + "text": "right across the face.", + "length": 22 + }, + { + "text": "'It really made me think.", + "length": 25 + }, + { + "text": "Newcastle, Leeds and Essex.", + "length": 27 + }, + { + "text": "11:04 EST, 7 February 2013 .", + "length": 28 + }, + { + "text": "07:18 EST, 8 February 2013 .", + "length": 28 + }, + { + "text": "Clusters of speckled areas are seen .", + "length": 37 + }, + { + "text": "As well as the public being able to .", + "length": 37 + }, + { + "text": "The skin scan highlights such things .", + "length": 38 + }, + { + "text": "It was taken by a pioneering UV skin .", + "length": 38 + }, + { + "text": "Experts hope that their campaign will .", + "length": 39 + }, + { + "text": "The results of the scan were a shocker.", + "length": 39 + }, + { + "text": "I have been looking online for a bronzer.", + "length": 41 + }, + { + "text": "nose, sides of face and forehead have not.", + "length": 42 + }, + { + "text": "She said the results of the scan were shocking .", + "length": 48 + }, + { + "text": "Her top lip has escaped but the cheeks, bridge of .", + "length": 51 + }, + { + "text": "Fine and coarse wrinkles are seen on sun damaged skin.", + "length": 54 + }, + { + "text": "beneath the surface when people lay under the machines.", + "length": 55 + }, + { + "text": "this month at major shopping centres in Sheffield, Manchester, .", + "length": 64 + }, + { + "text": "are glad to be bringing R UV UGLY back to England this February.", + "length": 64 + }, + { + "text": "scanner which shows up in stark detail the extreme damage caused .", + "length": 66 + }, + { + "text": "inspired by shows such as The Only Way Is Essex to get a permatan.", + "length": 66 + }, + { + "text": "help highlight the dangers of sunbeds to youngsters, who are often .", + "length": 68 + }, + { + "text": "as pigmentation and premature wrinkles, caused by overexposure to UV .", + "length": 70 + }, + { + "text": "rays from sunbeds, a turn off for all young women worried about their .", + "length": 71 + }, + { + "text": "Research UK's senior health campaigns manager Caroline Cerny said: 'We .", + "length": 72 + }, + { + "text": "visit sk:n clinics, Cancer Research UK will be taking R UV UGLY on tour .", + "length": 73 + }, + { + "text": "I was the oldest girl there and the UV damage was much worse than the others.", + "length": 77 + }, + { + "text": "'When I look back at my university years I was tanned pretty much all the time.", + "length": 79 + }, + { + "text": "Sometimes these patches become irregular and produce what are known as lentigos.", + "length": 80 + }, + { + "text": "I have not had any sunbeds since - not for Christmas parties and not for New Year.", + "length": 82 + }, + { + "text": "Kelly started using sunbeds at the local gym when she was 16 to maintain a holiday glow.", + "length": 88 + }, + { + "text": "It may also become patchy, or mottled darker patches of increased pigmentation may appear.", + "length": 90 + }, + { + "text": "This shocking image shows the cosmetic damage done to a woman's face after 14 years of using sunbeds.", + "length": 101 + }, + { + "text": "'Our model in the picture is just a member of the public who had been using sunbeds not even that much.", + "length": 103 + }, + { + "text": "' Skin expert Dr Askari Townshend, who reviewed the X-rays, said: 'Kelly has easily the greatest degree of damage.", + "length": 114 + }, + { + "text": "Malignant melanoma, the most serious form of skin cancer, is the second most common cancer amongst 15 to 34 year olds.", + "length": 118 + }, + { + "text": "The initiative aims to stave off the temptation to hit the sunbeds this winter by showing the ugly truth beneath the tan.", + "length": 121 + }, + { + "text": "Figures show using a sunbed just once or more a month could increase the chance of developing skin cancer by 50 per cent.", + "length": 121 + }, + { + "text": "'We have seen many young people know about the risks of sunbed use, but still believe they make them look and feel better.", + "length": 122 + }, + { + "text": "The in-depth picture shows clusters of unsightly damage under 30-year old Kelly Hughes' features which can lead to cancer.", + "length": 122 + }, + { + "text": "'No one wants to look older before their time, so we hope the skin scan will really help to change people's minds about using sunbeds.", + "length": 134 + }, + { + "text": "Spokesman David Djukic said: 'You may look perfectly fine on the surface but what the scanner does is show up what is going on underneath.", + "length": 138 + }, + { + "text": "In areas around the back of the neck, a leathery texture and coarseness with deep lines is frequently seen, the skin may feel dry and scaly.", + "length": 140 + }, + { + "text": "UV rays from sunbeds or over-exposure to the sun can damage the skin's DNA and, over time, this damage can build up and lead to skin cancer .", + "length": 141 + }, + { + "text": "This UV scan shows that after 14 years of using sunbeds, 30-year-old Kelly Hughes has clusters of unsightly pockmarks which can lead to cancer .", + "length": 144 + }, + { + "text": "Chronic sun exposure will also produce a thickened layer in the upper dermis known as solar elastosis, which gives a yellowish chicken skin look.", + "length": 145 + }, + { + "text": "Kelly, an account manager from Beaconsfield, Bucks, said: 'I started using sunbeds at the local gym when I was 16 to maintain that holiday feel .", + "length": 145 + }, + { + "text": "The initiative aims to stave off the temptation to hit the sunbeds this winter by exposing the often unnoticed, long-term skin damage sunbeds can cause .", + "length": 153 + }, + { + "text": "This month Cancer Research UK is raising awareness of the danger of sunbed use with a campaign entitled 'R UV UGLY', backed by leading skin specialists sk:n clinics.", + "length": 165 + }, + { + "text": "The campaign will see experts touring shopping centres with the machines, which offer in depth images of faces, to let people see first-hand how damaged their skin might be.", + "length": 173 + }, + { + "text": "'I would have more sessions in the winter as I did not want to look pale - my skin is freckly and moley so I knew I could be at risk of skin cancer but did not really want to think about that.", + "length": 192 + }, + { + "text": "It's hoped the campaign will highlight the dangers of sunbeds to youngsters often wanting to emulate the tans of The Only Way Is Essex stars, such as Sam Faiers (left) and Mark Wright and Lauren Goodger .", + "length": 204 + }, + { + "text": "It calls on people across the nation to face the damage, often invisible to the naked eye, being inflicted on their skin in pursuit of a tan, by offering free skin assessments at sk:n clinics across England.", + "length": 207 + }, + { + "text": "'Working with sk:n, this initiative will allow people to see the cosmetic damage from sunbed use, showing every time someone uses a sunbed they are damaging their skin, making it look worse in the long run and making skin coarse, leathery and wrinkled.", + "length": 252 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5832784622907639 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:12.274874885Z", + "first_section_created": "2025-12-23T09:36:12.276944169Z", + "last_section_published": "2025-12-23T09:36:12.277290483Z", + "all_results_received": "2025-12-23T09:36:12.365008222Z", + "output_generated": "2025-12-23T09:36:12.365230431Z", + "total_processing_time_ms": 90, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 87, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:12.276944169Z", + "publish_time": "2025-12-23T09:36:12.277161877Z", + "first_worker_start": "2025-12-23T09:36:12.277595395Z", + "last_worker_end": "2025-12-23T09:36:12.363861Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:12.277646097Z", + "start_time": "2025-12-23T09:36:12.277736501Z", + "end_time": "2025-12-23T09:36:12.277836605Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:12.277925Z", + "start_time": "2025-12-23T09:36:12.278099Z", + "end_time": "2025-12-23T09:36:12.363861Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 85 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:12.277555693Z", + "start_time": "2025-12-23T09:36:12.277627696Z", + "end_time": "2025-12-23T09:36:12.277757101Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:12.277518292Z", + "start_time": "2025-12-23T09:36:12.277595395Z", + "end_time": "2025-12-23T09:36:12.277648397Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:12.27723058Z", + "publish_time": "2025-12-23T09:36:12.277290483Z", + "first_worker_start": "2025-12-23T09:36:12.277734Z", + "last_worker_end": "2025-12-23T09:36:12.325575Z", + "total_journey_time_ms": 48, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:12.277759701Z", + "start_time": "2025-12-23T09:36:12.277782902Z", + "end_time": "2025-12-23T09:36:12.277791503Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:12.278071Z", + "start_time": "2025-12-23T09:36:12.278195Z", + "end_time": "2025-12-23T09:36:12.325575Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 47 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:12.2777224Z", + "start_time": "2025-12-23T09:36:12.277755401Z", + "end_time": "2025-12-23T09:36:12.277764402Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:12.277675298Z", + "start_time": "2025-12-23T09:36:12.277734Z", + "end_time": "2025-12-23T09:36:12.277738501Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 132, + "min_processing_ms": 47, + "max_processing_ms": 85, + "avg_processing_ms": 66, + "median_processing_ms": 85, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2606, + "slowest_section_id": 0, + "slowest_section_time_ms": 86 + } +} diff --git a/data/output/00711381cc1b5f7644d97d4a0829244fbdbd3d57.json b/data/output/00711381cc1b5f7644d97d4a0829244fbdbd3d57.json new file mode 100644 index 0000000..8ca53b9 --- /dev/null +++ b/data/output/00711381cc1b5f7644d97d4a0829244fbdbd3d57.json @@ -0,0 +1,326 @@ +{ + "file_name": "00711381cc1b5f7644d97d4a0829244fbdbd3d57.txt", + "total_words": 726, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "turbulence", + "count": 13 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "said", + "count": 11 + }, + { + "word": "passengers", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Babies cry.", + "length": 11 + }, + { + "text": "The squeamish gasp.", + "length": 19 + }, + { + "text": "Then, usually, it's all over.", + "length": 29 + }, + { + "text": "Sometimes overhead bins fly open.", + "length": 33 + }, + { + "text": "Learn more about airline turbulence » .", + "length": 40 + }, + { + "text": "And, on rare occasions, it can be deadly.", + "length": 41 + }, + { + "text": "In-flight turbulence is often a mere inconvenience.", + "length": 51 + }, + { + "text": "Most of the time, discomfort is the worst byproduct.", + "length": 52 + }, + { + "text": "Police said 26 passengers were injured, four seriously.", + "length": 55 + }, + { + "text": "Flight attendants have been hurt at a much higher rate.", + "length": 55 + }, + { + "text": "(CNN) -- It's a moment familiar to any regular air traveler.", + "length": 60 + }, + { + "text": "\"You can't see it; you can't sense it with radar,\" Dorr said.", + "length": 61 + }, + { + "text": "The plane bucks up and down, lurches forward and back, or both.", + "length": 63 + }, + { + "text": "Passengers said most of those injured were not wearing seat belts.", + "length": 66 + }, + { + "text": "\"The majority of injuries actually happen to flight attendants,\" Dorr said.", + "length": 75 + }, + { + "text": "\" And wearing seat belts, aviation officials say, is the best way to stay safe.", + "length": 79 + }, + { + "text": "\"They have to be up performing their tasks, even when the seat-belt light is on.", + "length": 80 + }, + { + "text": "\" And while pilots are almost always strapped in with seat belts, even they aren't exempt.", + "length": 90 + }, + { + "text": "A passenger's photo shows oxygen masks hanging from the ceiling on Monday's Continental Flight 128.", + "length": 99 + }, + { + "text": "\"Very rarely does it hurt passengers, which is odd because a lot of them don't keep their seat belts on.", + "length": 104 + }, + { + "text": "Turbulence, according to the Federal Aviation Administration, is caused by a quick change in air movement.", + "length": 106 + }, + { + "text": "\" Since 1980, three people have been killed in turbulence-related accidents, according to the administration.", + "length": 109 + }, + { + "text": "All licensed pilots also receive \"upset training,\" which teaches how to deal with extreme turbulence, he said.", + "length": 110 + }, + { + "text": "\"Normally, it's an inconvenience,\" said Kevin Garrison, a retired Delta Air Lines pilot living in Lexington, Kentucky.", + "length": 118 + }, + { + "text": "But it's also the leading cause of airline passenger injuries that are not associated with a fatal crash, experts say.", + "length": 118 + }, + { + "text": "The rapid shift in gravity force -- or G-force -- can cause a sensation not unlike being whipped around on a roller coaster.", + "length": 124 + }, + { + "text": "At least two of those deaths involved passengers who reportedly were not wearing seat belts while the seat-belt sign was on.", + "length": 124 + }, + { + "text": "\" Dorr said passenger injury numbers have dropped over the past few years, during an industry-wide effort to increase safety.", + "length": 125 + }, + { + "text": "Initial reports are that the Monday flight was hit by what's called clear-air turbulence, or air pockets that hit without warning.", + "length": 130 + }, + { + "text": "In the case of Monday's Continental Flight 128, an unexpected blast of air led to much more than jangled nerves, bumps and bruises.", + "length": 131 + }, + { + "text": "Jet streams, air shooting off of mountains, cold or warm weather fronts and thunderstorms can all cause changes in speed or direction.", + "length": 134 + }, + { + "text": "During that same time period, 184 attendants were seriously injured, despite their numbers being far smaller than the number of passengers.", + "length": 139 + }, + { + "text": "There have been 234 turbulence-related accidents since 1980, and 114 passengers were seriously injured in those accidents, the FAA reported.", + "length": 140 + }, + { + "text": "\"The best way to determine if there's a possibility of clear-air turbulence is to have somebody in front of you that's already flown through it.", + "length": 144 + }, + { + "text": "\"In the event that something happens,\" said Les Dorr, a spokesman for the FAA, \"that's the best advice that we or anyone else can give passengers.", + "length": 146 + }, + { + "text": "The Commercial Aviation Safety Team has focused on seat-belt awareness and pushed for better communication systems for reporting turbulence, Dorr said.", + "length": 151 + }, + { + "text": "In Monday's accident, passengers reported they were slammed into the Boeing 767's ceiling -- some said two or more times -- when the plane dropped rapidly.", + "length": 155 + }, + { + "text": "\"I've had a few bloodied heads -- when I hit my head on the overhead when the seat belt was a little loose,\" said Garrison, who flew for Delta for 27 years.", + "length": 156 + }, + { + "text": "While pilots are always on the lookout for regular turbulence spots -- like mountain ranges and places where weather fronts are converging -- clear-air turbulence can happen anywhere.", + "length": 183 + }, + { + "text": "No passengers were reported seriously injured in turbulence incidents in 2008, and five flight attendants -- down from a high of 19 in 2003 -- received serious injuries, according to the FAA.", + "length": 191 + }, + { + "text": "\"I saw people being thrown to the roof as if they were dolls,\" Fabio Ottolini, who was returning to Houston with his wife and daughter after visiting family in Brazil, told CNN affiliate KTRK-TV.", + "length": 195 + }, + { + "text": "Turbulence struck the flight, from Rio de Janeiro, Brazil, to Houston, Texas, unexpectedly, injuring seven passengers badly enough to require hospitalization after the pilot diverted the flight to Miami, Florida.", + "length": 212 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5694901943206787 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:12.778068391Z", + "first_section_created": "2025-12-23T09:36:12.778460406Z", + "last_section_published": "2025-12-23T09:36:12.778679815Z", + "all_results_received": "2025-12-23T09:36:12.846137437Z", + "output_generated": "2025-12-23T09:36:12.846338345Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:12.778460406Z", + "publish_time": "2025-12-23T09:36:12.778679815Z", + "first_worker_start": "2025-12-23T09:36:12.779227537Z", + "last_worker_end": "2025-12-23T09:36:12.845246Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:12.779193036Z", + "start_time": "2025-12-23T09:36:12.779268239Z", + "end_time": "2025-12-23T09:36:12.779354942Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:12.779429Z", + "start_time": "2025-12-23T09:36:12.779585Z", + "end_time": "2025-12-23T09:36:12.845246Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:12.779207436Z", + "start_time": "2025-12-23T09:36:12.77928674Z", + "end_time": "2025-12-23T09:36:12.779378243Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:12.779151734Z", + "start_time": "2025-12-23T09:36:12.779227537Z", + "end_time": "2025-12-23T09:36:12.779263239Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4342, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/00714adda49a39734eca286799750ebd39387f5c.json b/data/output/00714adda49a39734eca286799750ebd39387f5c.json new file mode 100644 index 0000000..6ae8751 --- /dev/null +++ b/data/output/00714adda49a39734eca286799750ebd39387f5c.json @@ -0,0 +1,278 @@ +{ + "file_name": "00714adda49a39734eca286799750ebd39387f5c.txt", + "total_words": 487, + "top_n_words": [ + { + "word": "the", + "count": 36 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "be", + "count": 10 + }, + { + "word": "a", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "greece", + "count": 8 + }, + { + "word": "i", + "count": 8 + }, + { + "word": "ranieri", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "7.", + "length": 2 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Tony Jimenez .", + "length": 14 + }, + { + "text": "Must do better?", + "length": 15 + }, + { + "text": "'It's a great challenge.", + "length": 24 + }, + { + "text": "' Ranieri was sacked by Monaco in May.", + "length": 38 + }, + { + "text": "'You represent the whole team and the country.", + "length": 46 + }, + { + "text": "'The next captain will be selected by elections.", + "length": 48 + }, + { + "text": "'I came here to take the team as high as possible.", + "length": 50 + }, + { + "text": "CLICK HERE to start picking your Fantasy Football team NOW!", + "length": 59 + }, + { + "text": "There’s £60,000 in prizes including £1,000 up for grabs EVERY WEEK… .", + "length": 75 + }, + { + "text": "'I will be telling the players that being captain is very important,' said Ranieri.", + "length": 83 + }, + { + "text": "Greece is the word: Claudio Ranieri has signed a two-year contract to manage Greece .", + "length": 85 + }, + { + "text": "Ranieri's first match in charge will be a home Euro 2016 qualifier against Romania on Sept.", + "length": 91 + }, + { + "text": "' Au revoir: Claudio Ranieri received a £4million pay off sacked as Monaco manager in May .", + "length": 92 + }, + { + "text": "Greece celebrate victory over Ivory Coast on their way to the last 16 of the 2014 World Cup .", + "length": 93 + }, + { + "text": "The individual should have the charisma to be able to bear the burden of having such a title.", + "length": 93 + }, + { + "text": "Well travelled: The Italian has managed numerous European clubs including Chelsea 2000 - 2004 .", + "length": 95 + }, + { + "text": "Northern Ireland, Hungary, Finland and Faroe Islands provide the rest of the opposition in Group F.", + "length": 99 + }, + { + "text": "Under Santos, Greece reached the last 16 for the first time before losing to Costa Rica on penalties.", + "length": 101 + }, + { + "text": "The new captain will be chosen by the squad after the talismanic Giorgios Karagounis decided to retire.", + "length": 103 + }, + { + "text": "The tie will be played behind closed doors after 2004 champions Greece were punished for previous crowd disturbances.", + "length": 117 + }, + { + "text": "Claudio Ranieri has promised to give young players a chance after being officially presented as the new coach of Greece.", + "length": 120 + }, + { + "text": "Our immediate aim is to qualify for Euro 2016 in France - it won't be easy but we have the potential and the talent to succeed.", + "length": 127 + }, + { + "text": "It’s not too late to play MailOnline Fantasy Football… There’s £1,000 to be won EVERY WEEK by the highest scoring manager .", + "length": 130 + }, + { + "text": "'I'm very happy to coach the national team of Greece and, although I had other proposals, I chose this one operating on instinct,' Ranieri told a news conference on Friday.", + "length": 172 + }, + { + "text": "'I am very interested in working with the younger players, who are the future of the team, and I will be in contact with the clubs in an effort to work with them on a regular basis.", + "length": 181 + }, + { + "text": "Ranieri has kept faith with the bulk of the squad that represented Greece at the World Cup and added several youngsters such as 21-year-old Olympiakos Piraeus striker Dimitris Diamantakos.", + "length": 188 + }, + { + "text": "The much-travelled Italian signed a two-year deal in July to have his first crack at international coaching after the contract of his predecessor Fernando Santos expired following the World Cup in Brazil.", + "length": 204 + }, + { + "text": "I believe in young players and there will be ample opportunities for them to show their potential,' added the 62-year-old former coach of Monaco, Chelsea, Valencia, Parma, Juventus, AS Roma and Inter Milan.", + "length": 206 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.44055062532424927 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:13.279460123Z", + "first_section_created": "2025-12-23T09:36:13.279906241Z", + "last_section_published": "2025-12-23T09:36:13.280101149Z", + "all_results_received": "2025-12-23T09:36:13.33935314Z", + "output_generated": "2025-12-23T09:36:13.339519047Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:13.279906241Z", + "publish_time": "2025-12-23T09:36:13.280101149Z", + "first_worker_start": "2025-12-23T09:36:13.280663772Z", + "last_worker_end": "2025-12-23T09:36:13.338506Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:13.280643671Z", + "start_time": "2025-12-23T09:36:13.280706774Z", + "end_time": "2025-12-23T09:36:13.280768376Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:13.280873Z", + "start_time": "2025-12-23T09:36:13.28101Z", + "end_time": "2025-12-23T09:36:13.338506Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:13.280598069Z", + "start_time": "2025-12-23T09:36:13.280663772Z", + "end_time": "2025-12-23T09:36:13.280741775Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:13.280674472Z", + "start_time": "2025-12-23T09:36:13.280747375Z", + "end_time": "2025-12-23T09:36:13.280778876Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2782, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/0071739400e34d34f53c075502a4dbdd47a5f686.json b/data/output/0071739400e34d34f53c075502a4dbdd47a5f686.json new file mode 100644 index 0000000..b8b9c4b --- /dev/null +++ b/data/output/0071739400e34d34f53c075502a4dbdd47a5f686.json @@ -0,0 +1,302 @@ +{ + "file_name": "0071739400e34d34f53c075502a4dbdd47a5f686.txt", + "total_words": 333, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "s", + "count": 12 + }, + { + "word": "u", + "count": 12 + }, + { + "word": "russian", + "count": 11 + }, + { + "word": "aircraft", + "count": 9 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "military", + "count": 7 + }, + { + "word": "about", + "count": 6 + }, + { + "word": "flew", + "count": 6 + }, + { + "word": "over", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "On both days, U.", + "length": 16 + }, + { + "text": "aircraft carrier.", + "length": 17 + }, + { + "text": "military officials.", + "length": 19 + }, + { + "text": "military officials.", + "length": 19 + }, + { + "text": "ships in the past year.", + "length": 23 + }, + { + "text": "Russian long-range flights skirting U.", + "length": 38 + }, + { + "text": "The last time Russian planes flew over a U.", + "length": 43 + }, + { + "text": "military officials said that in both cases, U.", + "length": 46 + }, + { + "text": "ships and flew alongside them until they left the area.", + "length": 55 + }, + { + "text": "The Bears overflew the ships at about 2,000 feet, officials said.", + "length": 65 + }, + { + "text": "or other nations' boundaries have also been common over the last year.", + "length": 70 + }, + { + "text": "WASHINGTON (CNN) -- Russian military aircraft flew just 500 feet over two U.", + "length": 76 + }, + { + "text": "Two Russian Ilyushin IL-38 maritime patrol aircraft flew only 500 feet above a U.", + "length": 81 + }, + { + "text": "aircraft carrier Stennis while it was in international waters in the Sea of Japan.", + "length": 82 + }, + { + "text": "Navy F/A-18 fighters met up with the Russian aircraft about 70 nautical miles from the U.", + "length": 89 + }, + { + "text": "On Monday, two Russian Ilyushin IL-38 maritime patrol aircraft, known as \"Mays,\" overflew the U.", + "length": 96 + }, + { + "text": "The Russian aircraft flew about 500 feet over the ship, lower than other flights the Russians have made over U.", + "length": 111 + }, + { + "text": "Navy ship was February 2008, when two Bears flew 2,000 feet over the aircraft carrier USS Nimitz south of Japan.", + "length": 112 + }, + { + "text": "Navy ships this week as the ships participated in a joint military exercise with South Korea in the Sea of Japan, according to U.", + "length": 129 + }, + { + "text": "The USS Stennis was about 80 miles east of Pohang, South Korea, participating in the joint military exercise when the flyover occurred.", + "length": 135 + }, + { + "text": "aircraft tried contacting the Russian planes on international air frequency radio channels, but the Russian pilots did not respond, officials said.", + "length": 147 + }, + { + "text": "On Tuesday, the USS Blue Ridge, a lead command and control ship, and the Stennis were overflown by two Russian \"Bear\" long-range bombers multiple times, according to U.", + "length": 168 + }, + { + "text": "Although the Pentagon does not often talk about the overflights, there is nothing illegal about the actions, and they are generally seen by the United States as nothing more than muscle-flexing by the Russian military.", + "length": 218 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5534682273864746 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:13.780883457Z", + "first_section_created": "2025-12-23T09:36:13.78121007Z", + "last_section_published": "2025-12-23T09:36:13.781387378Z", + "all_results_received": "2025-12-23T09:36:13.849905543Z", + "output_generated": "2025-12-23T09:36:13.850041548Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:13.78121007Z", + "publish_time": "2025-12-23T09:36:13.781387378Z", + "first_worker_start": "2025-12-23T09:36:13.782043804Z", + "last_worker_end": "2025-12-23T09:36:13.84896Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:13.782108307Z", + "start_time": "2025-12-23T09:36:13.782152909Z", + "end_time": "2025-12-23T09:36:13.78219521Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:13.782199Z", + "start_time": "2025-12-23T09:36:13.782363Z", + "end_time": "2025-12-23T09:36:13.84896Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:13.782058105Z", + "start_time": "2025-12-23T09:36:13.782123507Z", + "end_time": "2025-12-23T09:36:13.78217941Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:13.781987602Z", + "start_time": "2025-12-23T09:36:13.782043804Z", + "end_time": "2025-12-23T09:36:13.782064805Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1904, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/0071845937b319940dcbc041ae84f035f6ea21c3.json b/data/output/0071845937b319940dcbc041ae84f035f6ea21c3.json new file mode 100644 index 0000000..449ab43 --- /dev/null +++ b/data/output/0071845937b319940dcbc041ae84f035f6ea21c3.json @@ -0,0 +1,404 @@ +{ + "file_name": "0071845937b319940dcbc041ae84f035f6ea21c3.txt", + "total_words": 908, + "top_n_words": [ + { + "word": "the", + "count": 94 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "his", + "count": 16 + }, + { + "word": "boy", + "count": 15 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "as", + "count": 12 + }, + { + "word": "he", + "count": 12 + }, + { + "word": "and", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "on his tricycle.", + "length": 16 + }, + { + "text": "' asks the reporter.", + "length": 20 + }, + { + "text": "00, in rent per month.", + "length": 22 + }, + { + "text": "'Weren’t you scared?", + "length": 22 + }, + { + "text": "SCROLL DOWN FOR VIDEO: .", + "length": 24 + }, + { + "text": "12:41 EST, 25 September 2013 .", + "length": 30 + }, + { + "text": "03:43 EST, 25 September 2013 .", + "length": 30 + }, + { + "text": "Janet Tappin Coelho In Rio De Janeiro .", + "length": 39 + }, + { + "text": "Each family pays 300 reais, around £83.", + "length": 40 + }, + { + "text": "'No' says the child with a complete air of innocence.", + "length": 53 + }, + { + "text": "road down immediately to ensure the safety of the public and the child'.", + "length": 72 + }, + { + "text": "A reporter tracked down the family of five to a nearby run down property.", + "length": 73 + }, + { + "text": "Dicing with death: Fearless boy punches the air as he rides alongside cars .", + "length": 76 + }, + { + "text": "On Friday afternoon, journalists found the mother but she declined to comment.", + "length": 78 + }, + { + "text": "Risky: The Elevado Paulo de Frontin expressway is one of the busiest roads in Rio .", + "length": 83 + }, + { + "text": "Toy rider: The boy seems completely unfazed as he sets off on his death defying ride .", + "length": 86 + }, + { + "text": "As he veers precariously into the middle of the road, several cars come around the bend.", + "length": 88 + }, + { + "text": "Totally unfazed, the boy looks around at the stunned driver he is sharing the road with.", + "length": 88 + }, + { + "text": "They said: 'Nobody reported the incident to us, so we were unaware of what was happening.", + "length": 89 + }, + { + "text": "Rider in red: The boy is said to be known to authorities in Rio as a local trouble causer .", + "length": 91 + }, + { + "text": "It’s situated close to the motorway and opposite the Rio City hall building in the centre.", + "length": 92 + }, + { + "text": "No fear: The boy insisted he was not scared when reporters caught up with him after his stunt .", + "length": 95 + }, + { + "text": "Hard shoulder: At one point the boy is almost forced onto the central wall of the busy highway .", + "length": 96 + }, + { + "text": "Final lap: He eventually calls it a day after being narrowly missed by a number of speeding cars .", + "length": 98 + }, + { + "text": "The mother, father and three children were said by neighbours to be living in a one bedroom space.", + "length": 98 + }, + { + "text": "Fortunately, the driver spots the tricycle up ahead and swerves into the outside lane to avoid a collision.", + "length": 107 + }, + { + "text": "Pit stop: The junior joy rider stops close to the edge of the lane before rejoining the road for a second time .", + "length": 112 + }, + { + "text": "He even pretends to change gears by twisting the handlebars and hunches over as if revving his make-believe engine.", + "length": 115 + }, + { + "text": "Freewheeling: The boy, said to be from a local slum, kicked his legs out and sped down the hill of the busy motorway .", + "length": 118 + }, + { + "text": "The premises which has home walls inside, dividing the area up into separate dwellings, houses a total of twenty families.", + "length": 122 + }, + { + "text": "During his dangerous venture onto the carriageway, no driver stops to help the boy who is on a notoriously busy stretch of road.", + "length": 128 + }, + { + "text": "This time he hugs the side of the road and that may have saved his life as a grey car speeds round the bend, just yards behind the boy.", + "length": 135 + }, + { + "text": "By yesterday, the family had disappeared as the authorities, who had been alerted to the incident, turned up to take the children into care.", + "length": 140 + }, + { + "text": "The unnamed youngster was filmed careering down the Elevado Paulo de Frontin expressway which runs through downtown Rio, on his three-wheeler..", + "length": 143 + }, + { + "text": "Luckily, the road is unusually quiet as he gains composure and the fearless boy even takes the time to remove his right shoe before it falls off.", + "length": 145 + }, + { + "text": "Although they were not shown in the video, the boy was accompanied by two older brothers who had also participated in the dangerous motorway game.", + "length": 146 + }, + { + "text": "A local TV reporter who filmed the escapade gave the boy a post race interview when they found him a few minutes later sitting on his trusty tricycle.", + "length": 150 + }, + { + "text": "The stunt looks like it could go disastrously wrong for a moment when the toy-rider wobbles precariously around for a few seconds as he tries to gain control.", + "length": 158 + }, + { + "text": "An eight-year-old Brazilian boy showed he intends to live life in the fast lane after deciding to head out on one of the busiest motorways in Rio de Janeiro...", + "length": 159 + }, + { + "text": "The bypass is also riddled with security cameras operated by Rio city council, but they were oblivious to the incident until a local TV station showed them the footage.", + "length": 168 + }, + { + "text": "In the incredible clip, filmed on Friday, the youngster starts off at the top of a slope and gathers speed by pushing off with his left leg then jumping on the tricycle.", + "length": 169 + }, + { + "text": "The city's municipal guards, who are responsible for road safety, issued a statement this week apologising for their failure to respond appropriately to the incident on Friday.", + "length": 176 + }, + { + "text": "Rio city council revealed the brood was already known to social services and the children had been taken into care before following previous concerns raised about their welfare.", + "length": 177 + }, + { + "text": "Despite being one bump in the road away from certain peril, the fearless junior even has the audacity to give the alarmed motorists a thumbs up before grabbing the handlebars again.", + "length": 181 + }, + { + "text": "As the boy speeds down the slope of the two-lane carriageway into Avenida Presidente Vargas with his legs splayed out either side of the trike cars zoom past him on the outside lane.", + "length": 182 + }, + { + "text": "Not satisfied with the one lap, to the astonishment of surrounding drivers, the youngster then drags his bike from the hard shoulder and gets back into the inside lane for another go.", + "length": 183 + }, + { + "text": "As the boy reaches this the end of his ride he lifts his arms in celebration as if having triumphed in a grand prix, but his moment of glory is abruptly brought to an end by a motorist who blasts their horn at the pint sized daredevil.", + "length": 235 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5006404668092728 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:14.281551761Z", + "first_section_created": "2025-12-23T09:36:14.281980678Z", + "last_section_published": "2025-12-23T09:36:14.282252689Z", + "all_results_received": "2025-12-23T09:36:14.363886083Z", + "output_generated": "2025-12-23T09:36:14.364099092Z", + "total_processing_time_ms": 82, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 81, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:14.281980678Z", + "publish_time": "2025-12-23T09:36:14.282181786Z", + "first_worker_start": "2025-12-23T09:36:14.283044221Z", + "last_worker_end": "2025-12-23T09:36:14.362961Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:14.28300712Z", + "start_time": "2025-12-23T09:36:14.283071622Z", + "end_time": "2025-12-23T09:36:14.283159526Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:14.283266Z", + "start_time": "2025-12-23T09:36:14.28342Z", + "end_time": "2025-12-23T09:36:14.362961Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 79 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:14.282972418Z", + "start_time": "2025-12-23T09:36:14.283044221Z", + "end_time": "2025-12-23T09:36:14.283140525Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:14.283060322Z", + "start_time": "2025-12-23T09:36:14.283103823Z", + "end_time": "2025-12-23T09:36:14.283145625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:14.282209287Z", + "publish_time": "2025-12-23T09:36:14.282252689Z", + "first_worker_start": "2025-12-23T09:36:14.283095523Z", + "last_worker_end": "2025-12-23T09:36:14.347221Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:14.283239329Z", + "start_time": "2025-12-23T09:36:14.28327663Z", + "end_time": "2025-12-23T09:36:14.283281531Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:14.283961Z", + "start_time": "2025-12-23T09:36:14.28408Z", + "end_time": "2025-12-23T09:36:14.347221Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:14.283125024Z", + "start_time": "2025-12-23T09:36:14.283150525Z", + "end_time": "2025-12-23T09:36:14.283153625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:14.283050621Z", + "start_time": "2025-12-23T09:36:14.283095523Z", + "end_time": "2025-12-23T09:36:14.283098123Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 142, + "min_processing_ms": 63, + "max_processing_ms": 79, + "avg_processing_ms": 71, + "median_processing_ms": 79, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2534, + "slowest_section_id": 0, + "slowest_section_time_ms": 80 + } +} diff --git a/data/output/00719cbc72803002a6a5d83e5f260dbe1f52990d.json b/data/output/00719cbc72803002a6a5d83e5f260dbe1f52990d.json new file mode 100644 index 0000000..d3e61d1 --- /dev/null +++ b/data/output/00719cbc72803002a6a5d83e5f260dbe1f52990d.json @@ -0,0 +1,246 @@ +{ + "file_name": "00719cbc72803002a6a5d83e5f260dbe1f52990d.txt", + "total_words": 671, + "top_n_words": [ + { + "word": "the", + "count": 45 + }, + { + "word": "of", + "count": 24 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "in", + "count": 16 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "is", + "count": 10 + }, + { + "word": "by", + "count": 9 + }, + { + "word": "would", + "count": 8 + }, + { + "word": "be", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "How it works .", + "length": 14 + }, + { + "text": "Connecting cities .", + "length": 19 + }, + { + "text": "Perhaps in the future, the high-rise superstructures could also help revolutionize the way we travel.", + "length": 101 + }, + { + "text": "\"Governments will be able to take advantage of such spaces in order to re-adapt the cities' structure to society needs,\" he adds.", + "length": 129 + }, + { + "text": "But, you might wonder by now, how could commuters stay on their feet whilst the train slides in hyper speeds along the huge tower's façade?", + "length": 142 + }, + { + "text": "The main idea is that instead of traveling on normal rails beneath, the carriages would be supported by magnetic tracks running up the skyscraper's exterior.", + "length": 157 + }, + { + "text": "The hope is to connect a new hyper-speed network of underground tunnels and overground routes where superfast trains would cover distances of 300 miles in 30 minutes.", + "length": 166 + }, + { + "text": "\"In 60 years' time, it will be very difficult for governments to find attractive pieces of available land for public use in the heart of megacities,\" says Christophi, 27.", + "length": 170 + }, + { + "text": "The designer's vertical station concept calls for a tall cylindrical skyscraper whose small footprint would allow the transformation of the surrounding area into an urban park.", + "length": 176 + }, + { + "text": "(CNN) -- For decades, skyscrapers have served as iconic symbols of national pride or flashy trophies of corporate wealth, reshaping the skyline of the world's major urban centers.", + "length": 179 + }, + { + "text": "The designers say the towers, which would be capped off by a rooftop green plaza, are envisioned as individual pieces of infrastructure that could be replicated in cities around the world.", + "length": 188 + }, + { + "text": "Yet, like in most futuristic transport proposals, practical details are best to take a back seat for now to allow us to enjoy the thrilling ride -- that is, unless you're afraid of heights.", + "length": 189 + }, + { + "text": "Passengers arriving at the tower would use a lift to make their way up into the platform and from there into their carriage, which could accommodate 10 people sat in two rows opposite each other.", + "length": 195 + }, + { + "text": "Towering above the crowded streets of future metropolises, these giant buildings are designed to minimize the large slices of real estate that major railway terminals occupy by flipping them on their side.", + "length": 205 + }, + { + "text": "This, they claim, would not only save commuting time and simplify the way public transport is being used, but would also help to cut down CO2 emissions by replacing ways of transport powered by fossil fuels.", + "length": 207 + }, + { + "text": "The radical proposal won the designers an Honorable Mention at this year's eVolo Skyscraper competition, which encouraged people from around the world to propose new ideas for vertical structures of the future.", + "length": 210 + }, + { + "text": "The goal, designers Christopher Christophi and Lucas Mazarrasa say, is to free up valuable space in the densely-packed cities of tomorrow, which will be significantly challenged by overcrowding and a sharp drop in public space availability.", + "length": 240 + }, + { + "text": "\" Of course, there are a number of limitations to the project -- the proposal deals only with stations designed to accommodate city by city travel, not to mention efficiency issues around loading trains in high volume terminals and connecting train routes.", + "length": 256 + }, + { + "text": "That, at least, is the fanciful concept behind the Vertical Hyper-Speed Train Hub, a futuristic proposal of two UK-based architects envisaging trains roaring up and down the side of specially-designed skyscrapers nearly as high as the Empire State Building.", + "length": 257 + }, + { + "text": "\"The Maglev trains currently travel at 360 miles per hour -- this technology by the 2075 will in no doubt move leaps and bounds from what it already is today, making the hyper-speed trains probably the fastest and safest way of transporting goods and people.", + "length": 258 + }, + { + "text": "\"Our conceptual design is based upon utilizing existing and viable technologies that can currently be seen in hyper speed rail networks, for example in China,\" says Mazarrasa, 29, adding that is a matter of time before we're able to reach the rail speeds their concept requires.", + "length": 278 + }, + { + "text": "Each carriage proportion is designed as a cubical shape to enable it to function both vertically, when docked, and horizontally, while traveling After the train's departure, the wagons would pivot like a \"Ferris wheel,\" allowing commuters to remain in an upright position and enjoy breathtaking views of the city.", + "length": 313 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.3813313841819763 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:14.783013696Z", + "first_section_created": "2025-12-23T09:36:14.783433213Z", + "last_section_published": "2025-12-23T09:36:14.783679023Z", + "all_results_received": "2025-12-23T09:36:14.849477178Z", + "output_generated": "2025-12-23T09:36:14.849656986Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:14.783433213Z", + "publish_time": "2025-12-23T09:36:14.783679023Z", + "first_worker_start": "2025-12-23T09:36:14.784234046Z", + "last_worker_end": "2025-12-23T09:36:14.848557Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:14.784275947Z", + "start_time": "2025-12-23T09:36:14.78434205Z", + "end_time": "2025-12-23T09:36:14.784420453Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:14.784542Z", + "start_time": "2025-12-23T09:36:14.784719Z", + "end_time": "2025-12-23T09:36:14.848557Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:14.784165343Z", + "start_time": "2025-12-23T09:36:14.784234046Z", + "end_time": "2025-12-23T09:36:14.784313949Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:14.784217145Z", + "start_time": "2025-12-23T09:36:14.784300248Z", + "end_time": "2025-12-23T09:36:14.784321749Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4069, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0071bd9843284def7e727c7113a63a52199c3c1b.json b/data/output/0071bd9843284def7e727c7113a63a52199c3c1b.json new file mode 100644 index 0000000..8a2cc3d --- /dev/null +++ b/data/output/0071bd9843284def7e727c7113a63a52199c3c1b.json @@ -0,0 +1,270 @@ +{ + "file_name": "0071bd9843284def7e727c7113a63a52199c3c1b.txt", + "total_words": 507, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "in", + "count": 17 + }, + { + "word": "miceli", + "count": 16 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "was", + "count": 11 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "on", + "count": 9 + }, + { + "word": "orleans", + "count": 9 + }, + { + "word": "prison", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "She is seen here as a young woman .", + "length": 35 + }, + { + "text": "The agreement was signed on October 1.", + "length": 38 + }, + { + "text": "Miceli then died in hospital on January 6, 2009.", + "length": 48 + }, + { + "text": "She stopped breathing and never regained consciousness .", + "length": 56 + }, + { + "text": "However Gusman has not admitted any wrongdoing in the case .", + "length": 60 + }, + { + "text": "She never regained consciousness and was put on life support.", + "length": 61 + }, + { + "text": "Miceli was accused of biting the ankles of a hospital security guard.", + "length": 69 + }, + { + "text": "Cayne Miceli, 43, of New Orleans, died in hospital on January 6, 2009.", + "length": 70 + }, + { + "text": "By the time an ambulance arrived Miceli had been without a pulse for 25 minutes.", + "length": 80 + }, + { + "text": "However Gusman's office has not admitted to any wrongdoing as part of the settlement.", + "length": 85 + }, + { + "text": "However, her death is not officially counted in the tally because she died in hospital.", + "length": 87 + }, + { + "text": "Miceli is one of 44 inmates to die in the last nine years at the Orleans Parish Prison.", + "length": 87 + }, + { + "text": "Four hours later she was found to be not breathing, according to The New Orleans Times-Picayune.", + "length": 96 + }, + { + "text": "However the DA's office determined there was no criminal wrongdoing by staff at the Orleans Parish Prison.", + "length": 107 + }, + { + "text": "Scene: Some 44 inmates - not including Miceli - have died at the Orleans Parish Prison in the last nine years .", + "length": 112 + }, + { + "text": "With a history of chronic asthma, panic attacks and depression, Miceli was put in five-point restraints on a bed.", + "length": 113 + }, + { + "text": "The prison knew she had been hospital for breathing difficulties, however left Miceli restrained, the lawsuit claimed.", + "length": 118 + }, + { + "text": "Paying up: Sheriff Marlin Gusman agreed to the sum to end the wrongful death lawsuit brought by the family of Cayne Miceli.", + "length": 123 + }, + { + "text": "Death in custody: Cayne Miceli, 43, died after being placed in five-point restraints in Orleans Parish Prison in January 2009.", + "length": 127 + }, + { + "text": "She was then booked on municipal charges of disturbing the peace, resisting arrest and battery on an officer and taken to the jail.", + "length": 131 + }, + { + "text": "The federal lawsuit claimed that jail staff didn't provide adequate medical care to Miceli and caused her death with the restraints.", + "length": 132 + }, + { + "text": "The Miceli family lawsuit was one of at least six filed against the office by relatives of dead inmates, The Times-Picayune reported.", + "length": 133 + }, + { + "text": "The family of a woman who died in a New Orleans jail after being tied to a bed for four hours have settled their lawsuit for $600,000.", + "length": 134 + }, + { + "text": "The wrongful death lawsuit brought by her family in 2010 said Miceli voluntarily admitted herself to hospital after suffering a severe asthma attack.", + "length": 149 + }, + { + "text": "The prison's sheriff, Marlin Gusman, has now agreed to settle for $600,000 in three separate payments, the last of which is due on September 15, 2016.", + "length": 150 + }, + { + "text": "Cayne Miceli, 43, was booked into Orleans Parish Prison on January 4, 2009, for allegedly biting a security guard during at the Tulane Medical Center earlier that day.", + "length": 167 + }, + { + "text": "Controversial: The five-point restraint system, pictured here, has been removed from many prisons, but was still in practice at Orleans Parish Prison when Miceli was booked in .", + "length": 178 + }, + { + "text": "However when doctors tried to discharge her several hours later, Miceli, who insisted she needed more treatment, panicked and fought with hospital personnel, the lawsuit said, according to The Times-Picayune.", + "length": 208 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7322291731834412 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:15.284472532Z", + "first_section_created": "2025-12-23T09:36:15.285813086Z", + "last_section_published": "2025-12-23T09:36:15.285964492Z", + "all_results_received": "2025-12-23T09:36:15.349315549Z", + "output_generated": "2025-12-23T09:36:15.349463954Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:15.285813086Z", + "publish_time": "2025-12-23T09:36:15.285964492Z", + "first_worker_start": "2025-12-23T09:36:15.286437811Z", + "last_worker_end": "2025-12-23T09:36:15.348414Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:15.286464712Z", + "start_time": "2025-12-23T09:36:15.286534715Z", + "end_time": "2025-12-23T09:36:15.286603918Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:15.286705Z", + "start_time": "2025-12-23T09:36:15.286836Z", + "end_time": "2025-12-23T09:36:15.348414Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:15.28641391Z", + "start_time": "2025-12-23T09:36:15.286488513Z", + "end_time": "2025-12-23T09:36:15.286566416Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:15.286390809Z", + "start_time": "2025-12-23T09:36:15.286437811Z", + "end_time": "2025-12-23T09:36:15.286471313Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2981, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/0071c3032bb2555ad11df4662be90094a643cb09.json b/data/output/0071c3032bb2555ad11df4662be90094a643cb09.json new file mode 100644 index 0000000..c0a465e --- /dev/null +++ b/data/output/0071c3032bb2555ad11df4662be90094a643cb09.json @@ -0,0 +1,230 @@ +{ + "file_name": "0071c3032bb2555ad11df4662be90094a643cb09.txt", + "total_words": 419, + "top_n_words": [ + { + "word": "the", + "count": 26 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "brown", + "count": 14 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "court", + "count": 12 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "his", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "was", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "Then, she was a witness for the prosecution.", + "length": 44 + }, + { + "text": "Rihanna shows support in court for Chris Brown .", + "length": 48 + }, + { + "text": "\" Rihanna and Chris Brown's relationship through the years .", + "length": 60 + }, + { + "text": "\"And I don't mean just false, it is fraudulent,\" Geragos said.", + "length": 62 + }, + { + "text": "Brown wasn't in town on some of the dates reported, the motion said.", + "length": 68 + }, + { + "text": "When the pop star sat behind Brown in court at that hearing, it was Rihanna's second time in a courtroom with him.", + "length": 114 + }, + { + "text": "Brown and his mother were in court for Friday's hearing, but Rihanna, who attended his last court date, was not there.", + "length": 118 + }, + { + "text": "The first was the day in August 2009 when Brown was sentenced to five years' probation and ordered to stay away from her.", + "length": 121 + }, + { + "text": "The Los Angeles County district attorney's office has \"tortured\" Brown during his probation more than any client he's ever had, Geragos said.", + "length": 141 + }, + { + "text": "She is asking the judge to order him to restart his 1,400 hours of community service under the supervision of a Los Angeles probation officer.", + "length": 142 + }, + { + "text": "The judge emerged from his chambers to order Brown to come back on June 10 because lawyers need more time to look at \"additional discovery\" in his case.", + "length": 152 + }, + { + "text": "Despite the serious allegations outlined in the court filing, the prosecutor is not asking for Brown's probation to be revoked and the singer sent to jail.", + "length": 155 + }, + { + "text": "While not much happened in Friday's hearing before Los Angeles County Superior Court Judge James Brandlin, ultimately it could be big trouble for the singer.", + "length": 157 + }, + { + "text": "At the probation court date, when Geragos was asked why Brown's assault victim was in court, he replied, \"She thinks it's utterly ridiculous what they're doing to him.", + "length": 167 + }, + { + "text": "The paperwork Brown submitted to show he had completed community labor is \"at best sloppy documentation and at worst fraudulent reporting,\" District Attorney Jackie Lacey said.", + "length": 176 + }, + { + "text": "Los Angeles (CNN) -- Chris Brown sat alone in court for 35 minutes on Friday while his lawyer talked with the judge and prosecutor behind closed doors in his probation violation case.", + "length": 183 + }, + { + "text": "In a court filing in February, prosecutors accused Brown of not completing the 180 days of community labor ordered when he pleaded guilty to a felony assault charge in the beating of his girlfriend Rihanna.", + "length": 206 + }, + { + "text": "Mark Geragos, Brown's attorney, said after the last hearing that the prosecutor's filing was so fraudulent that he would ask the judge to punish the deputy district attorneys involved and call for a contempt of court hearing for filing false documents with the court.", + "length": 267 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5193307995796204 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:15.7867316Z", + "first_section_created": "2025-12-23T09:36:15.78699991Z", + "last_section_published": "2025-12-23T09:36:15.787163617Z", + "all_results_received": "2025-12-23T09:36:15.847657758Z", + "output_generated": "2025-12-23T09:36:15.847832865Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:15.78699991Z", + "publish_time": "2025-12-23T09:36:15.787163617Z", + "first_worker_start": "2025-12-23T09:36:15.787623136Z", + "last_worker_end": "2025-12-23T09:36:15.846774Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:15.787658737Z", + "start_time": "2025-12-23T09:36:15.787710539Z", + "end_time": "2025-12-23T09:36:15.787759441Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:15.787896Z", + "start_time": "2025-12-23T09:36:15.788029Z", + "end_time": "2025-12-23T09:36:15.846774Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:15.787650337Z", + "start_time": "2025-12-23T09:36:15.787711539Z", + "end_time": "2025-12-23T09:36:15.787790642Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:15.787569333Z", + "start_time": "2025-12-23T09:36:15.787623136Z", + "end_time": "2025-12-23T09:36:15.787653537Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2397, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/00724c0377fc4af3032c3d9bed712ed283917d3c.json b/data/output/00724c0377fc4af3032c3d9bed712ed283917d3c.json new file mode 100644 index 0000000..e33726d --- /dev/null +++ b/data/output/00724c0377fc4af3032c3d9bed712ed283917d3c.json @@ -0,0 +1,340 @@ +{ + "file_name": "00724c0377fc4af3032c3d9bed712ed283917d3c.txt", + "total_words": 1057, + "top_n_words": [ + { + "word": "the", + "count": 55 + }, + { + "word": "of", + "count": 48 + }, + { + "word": "to", + "count": 36 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "in", + "count": 28 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "ahmadinejad", + "count": 24 + }, + { + "word": "that", + "count": 15 + }, + { + "word": "his", + "count": 13 + }, + { + "word": "with", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "Mr.", + "length": 3 + }, + { + "text": "\"Mr.", + "length": 4 + }, + { + "text": "The language of this Mr.", + "length": 24 + }, + { + "text": "Why do you just talk to foreign journalists?", + "length": 44 + }, + { + "text": "Why do you run away from Iranian journalists?", + "length": 45 + }, + { + "text": "That sense of frustration is not limited to Iranians.", + "length": 53 + }, + { + "text": "Ahmadinejad why don't you talk to Iranian journalists?", + "length": 54 + }, + { + "text": "The opinions expressed in this commentary are solely those of Hamid Dabashi.", + "length": 76 + }, + { + "text": "But still the balance of the result tipped heavily in favor of Ahmadinejad's rhetorical one-upmanship.", + "length": 102 + }, + { + "text": "President only I understand,\" while the producer is baffled by the thick moustache that \"Larry King\" has suddenly grown.", + "length": 120 + }, + { + "text": "But in this case, President Obama had an opportunity during his subsequent interview with Kalbasi to respond to Ahmadinejad.", + "length": 124 + }, + { + "text": "The other factor is the language barrier between Ahmadinejad and his interviewers, which he strategically uses to his advantage.", + "length": 128 + }, + { + "text": "Jon Leyne, the distinguished senior BBC correspondent has written a wonderful essay discussing the difficulties of interviewing Ahmadinejad.", + "length": 140 + }, + { + "text": "These are problems that American journalism as an institution faces as it tries to cope with and cover a far more globalized planet than we've ever seen before.", + "length": 160 + }, + { + "text": "Still, too many of Ahmadinejad's statements went unchallenged last week --particularly those that had to do with the vast array of atrocities in his own country.", + "length": 161 + }, + { + "text": "These are not problems that can be solved by handing to journalists a list of questions to ask a head of state with just too many skeletons in his closet to count.", + "length": 163 + }, + { + "text": "Sporting his thick moustache and holding a list of tough questions in hand, the grandfather is charging out of the closet yelling at a CNN producer, \"Get out of my way!", + "length": 168 + }, + { + "text": "\" But that is not the modus operandi of a journalistic culture that is conceptually geared towards geopolitics and \"international\" politics rather than domestic matters.", + "length": 169 + }, + { + "text": "In response to Amanpour's question, Ahmadinejad point blank said that this report is false and Ashtiani has not been condemned to death by stoning -- which was a plain lie.", + "length": 172 + }, + { + "text": "Ahmadinejad always wins in these encounters because he points to other atrocities by redirecting the question at the questioner, and there are plenty of atrocities around the globe.", + "length": 181 + }, + { + "text": "Leyne's young colleague, Bahman Kalbasi of BBC Persian has now become a Facebook phenomenon because he accosted Ahmadinejad in a hallway at the UN and shouted a succession of questions at him: \"Mr.", + "length": 197 + }, + { + "text": "Leyne points out how Ahmadinejad succeeds \"in moving the agenda onto a ground of his own choosing, and few, if any, of the Western journalists who have interviewed him have scored many points off him.", + "length": 200 + }, + { + "text": "Slavin has suggested that \"reporters need to be armed with in-depth knowledge of Iran's economy, politics and society -- and even then, they may have difficulty getting Ahmadinejad to admit the truth.", + "length": 200 + }, + { + "text": "To be sure, Amanpour did ask Ahmadinejad about executions increasing fourfold since he took office, as well as about the Iranian regime taking action against opposition leaders, including raiding their offices.", + "length": 210 + }, + { + "text": "Ahmadinejad's technique,\" Leyne points out \"is aided by the fact that most of the foreign interviews are carried out in translation -- leaving the journalist less scope for jumping in, and less time to cross-examine.", + "length": 216 + }, + { + "text": "The point of the cartoon is a deep and pervasive sense of frustration that Iranians all over the world have with the inability of prominent American journalists and talk show hosts to handle the slippery Ahmadinejad.", + "length": 216 + }, + { + "text": "\" Ahmadinejad left his real surprise for after all his interviews, when during his official address to the General Assembly he effectively accused the United States government of direct involvement in the atrocities of 9/11.", + "length": 224 + }, + { + "text": "Christiane Amanpour, Charlie Rose, and Larry King in particular are being criticized for providing Ahmadinejad with a global forum to say whatever nonsense he wishes without enough of a serious challenge to his statements -- some of which are flat-out lies.", + "length": 257 + }, + { + "text": "\" Perhaps the best example of how Ahmadinejad manages to slip away from hard questions is when Christiane Amanpour asked him about the case of Sakineh Mohammadi Ashtiani, a woman charged with murder and adultery and originally condemned to death by stoning.", + "length": 257 + }, + { + "text": "\" The former USA Today correspondent Barbara Slavin has also written an article, \"How not to get played by Ahmadinejad,\" in which she too testifies that the \"Iranian president has perfected the art of slipping and sliding around even the most seasoned interviewers.", + "length": 265 + }, + { + "text": "And in Larry King's case, after interviewing Ahmadinejad he had a follow-up conversation with Fareed Zakaria, the host of CNN's \"Fareed Zakaria GPS,\" in which the evasive answers of Ahmadinejad were put in proper context with more detailed attention to the internal atrocities in Iran.", + "length": 285 + }, + { + "text": "\"For him to make the statement here in Manhattan,\" President Obama said, \"just a little north of Ground Zero, where families lost their loved ones, people of all faiths, all ethnicities who see this as the seminal tragedy of this generation, for him to make a statement like that was inexcusable,\" Obama said.", + "length": 309 + }, + { + "text": "In anticipation of Ahmadinejad's trip to New York, the International Campaign for Human Rights in Iran had in fact prepared a full preparatory list of atrocities perpetrated under the administration of Ahmadinejad's for American journalists -- with key facts and crucial issues that they might raise when interviewing him.", + "length": 322 + }, + { + "text": "New York (CNN) -- In a recent piece by prominent Iranian cartoonist Mana Neyestani, we see one of his favorite characters -- a cantankerous grandfather who along with his two grandchildren is a solid supporter of the Green Movement against the regime in Iran -- having managed to tie up Larry King inside a closet and trying to disguise himself as the world renowned talk show host in order to get to interview Mahmoud Ahmadinejad.", + "length": 431 + }, + { + "text": "Since the massively contested presidential election of June 2009, scores of peaceful demonstrators have been arbitrarily arrested, tortured, and murdered; prominent human and women's rights activists, reformists, and labor union leaders have been arrested and subjected to Stalinist show trials and given long and punishing prison terms; the leaders of the opposition Green Movement have been systematically harassed and intimidated; the universities have gone through yet another round of ideological purges; yet another cultural revolution to silence and suppress non-conformist ideas is well under way; an entire cadre of independent-minded journalists have been forced into the indignity of exile -- and yet few of these atrocities manages to gain much attention in the conversations that these prominent American journalists have with Ahmadinejad.", + "length": 852 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6022711396217346 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:16.287439805Z", + "first_section_created": "2025-12-23T09:36:16.287858322Z", + "last_section_published": "2025-12-23T09:36:16.288381843Z", + "all_results_received": "2025-12-23T09:36:16.375335952Z", + "output_generated": "2025-12-23T09:36:16.376155485Z", + "total_processing_time_ms": 88, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 86, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:16.287858322Z", + "publish_time": "2025-12-23T09:36:16.288182735Z", + "first_worker_start": "2025-12-23T09:36:16.288613952Z", + "last_worker_end": "2025-12-23T09:36:16.371319Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:16.288730457Z", + "start_time": "2025-12-23T09:36:16.28880506Z", + "end_time": "2025-12-23T09:36:16.288922965Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:16.288839Z", + "start_time": "2025-12-23T09:36:16.288966Z", + "end_time": "2025-12-23T09:36:16.371319Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:16.288533849Z", + "start_time": "2025-12-23T09:36:16.288613952Z", + "end_time": "2025-12-23T09:36:16.288703156Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:16.288637953Z", + "start_time": "2025-12-23T09:36:16.288721857Z", + "end_time": "2025-12-23T09:36:16.288768858Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:16.288267138Z", + "publish_time": "2025-12-23T09:36:16.288381843Z", + "first_worker_start": "2025-12-23T09:36:16.288767058Z", + "last_worker_end": "2025-12-23T09:36:16.374657Z", + "total_journey_time_ms": 86, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:16.28881736Z", + "start_time": "2025-12-23T09:36:16.288854162Z", + "end_time": "2025-12-23T09:36:16.288888563Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:16.28905Z", + "start_time": "2025-12-23T09:36:16.289164Z", + "end_time": "2025-12-23T09:36:16.374657Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 85 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:16.28881626Z", + "start_time": "2025-12-23T09:36:16.288854762Z", + "end_time": "2025-12-23T09:36:16.288903264Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:16.288709056Z", + "start_time": "2025-12-23T09:36:16.288767058Z", + "end_time": "2025-12-23T09:36:16.288786159Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 167, + "min_processing_ms": 82, + "max_processing_ms": 85, + "avg_processing_ms": 83, + "median_processing_ms": 85, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3281, + "slowest_section_id": 1, + "slowest_section_time_ms": 86 + } +} diff --git a/data/output/0072512376f8112d9ff3cfffc4c9efed58264865.json b/data/output/0072512376f8112d9ff3cfffc4c9efed58264865.json new file mode 100644 index 0000000..03c5782 --- /dev/null +++ b/data/output/0072512376f8112d9ff3cfffc4c9efed58264865.json @@ -0,0 +1,242 @@ +{ + "file_name": "0072512376f8112d9ff3cfffc4c9efed58264865.txt", + "total_words": 496, + "top_n_words": [ + { + "word": "the", + "count": 24 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "foods", + "count": 17 + }, + { + "word": "whole", + "count": 17 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "at", + "count": 12 + }, + { + "word": "prices", + "count": 11 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "of", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "Whole Foods reported a rise of 3.", + "length": 33 + }, + { + "text": "Mariano's and Wegmans groceries also rung in less than Whole Foods, by about 30-40 dollars.", + "length": 91 + }, + { + "text": "9 per cent, which is minimal compared to the sales increases of 8 per cent seen in the past.", + "length": 92 + }, + { + "text": "Expensive: Prices at Whole Foods in almost every case of the study were higher by a wide margin .", + "length": 97 + }, + { + "text": "This above graphs shows a comparison of prices at Whole Foods with regional grocery chains throughout the U.", + "length": 108 + }, + { + "text": "Prices were lowest at HEB where the final total came to about 220, much less than the more than 250 spent at Whole Foods.", + "length": 121 + }, + { + "text": "And now the chain is getting desperate to turn out more business, since their stock prices have fallen 40 per cent since late October 2013.", + "length": 139 + }, + { + "text": "Bring them back: Whole Foods is reportedly instituting a loyalty rewards program and national marketing campaign to drum up more business .", + "length": 139 + }, + { + "text": "Prices are seemingly becoming a problem for the national grocery chain, which saw only a small increase in same store sales for the last quarter.", + "length": 145 + }, + { + "text": "Prices at HEB and Mariano's remained considerably lower than Whole Foods, but in this sample Wegmans ended up being the more expensive store by a small margin.", + "length": 159 + }, + { + "text": "Prices at Texas grocery chain HEB were the lowest in each comparison, with prices lower than Whole Foods by a margin of about 30 dollars in the larger sruvey .", + "length": 159 + }, + { + "text": "Wegmans, a grocery chain based in the Mid-Atlantic region, with outposts in Washington, DC, actually had higher prices than Whole Foods in one of the surveys .", + "length": 159 + }, + { + "text": "Chicago chain Mariano's had consistently lower prices than Whole Foods in both studies as well, despite being a supermarket with similarly organic and natural products .", + "length": 169 + }, + { + "text": "In another smaller survey, Wolfe bought between 11 and 17 organic or conventional produce items at the three regional grocery stores and compared them again to Whole Foods.", + "length": 172 + }, + { + "text": "As a response, Whole Foods will making a few changes to increase business including starting a loyalty rewards program and launching a national marketing campaign for the first time.", + "length": 182 + }, + { + "text": "It's no secret that Whole Foods isn't the most cost-friendly of grocery stores, but a new survey shows just how much more customers pay at the natural food retailer compared to other regional supermarkets.", + "length": 205 + }, + { + "text": "In one instance, Wolfe totaled 60-72 items at Whole Foods and then compared them to prices for the same exact products at Houston's HEB, a Mariano's grocery store in Chicago, and a Wegmans market in Washington, DC.", + "length": 214 + }, + { + "text": "New York investment firm Wolfe Research recently did a study, comparing baskets of groceries at the upscale organic market to other stores across the nation and confirmed that tags at Whole Foods are through the roof.", + "length": 217 + }, + { + "text": "'The emergence of the multi-pronged competitive attack on Whole Foods by generally lower-priced competitors suggests to us that Whole Foods’ business model faces a very real, clear and present danger,' Wolfe analysts wrote last week.", + "length": 235 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6137027144432068 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:16.78914685Z", + "first_section_created": "2025-12-23T09:36:16.79061501Z", + "last_section_published": "2025-12-23T09:36:16.790787916Z", + "all_results_received": "2025-12-23T09:36:16.85208189Z", + "output_generated": "2025-12-23T09:36:16.852241196Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:16.79061501Z", + "publish_time": "2025-12-23T09:36:16.790787916Z", + "first_worker_start": "2025-12-23T09:36:16.791301537Z", + "last_worker_end": "2025-12-23T09:36:16.851174Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:16.791256635Z", + "start_time": "2025-12-23T09:36:16.791312638Z", + "end_time": "2025-12-23T09:36:16.79136714Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:16.791456Z", + "start_time": "2025-12-23T09:36:16.791602Z", + "end_time": "2025-12-23T09:36:16.851174Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:16.791302137Z", + "start_time": "2025-12-23T09:36:16.79136664Z", + "end_time": "2025-12-23T09:36:16.791437243Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:16.791241735Z", + "start_time": "2025-12-23T09:36:16.791301537Z", + "end_time": "2025-12-23T09:36:16.791332938Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2879, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0072733a68b164630221341584ce2e235c5ee547.json b/data/output/0072733a68b164630221341584ce2e235c5ee547.json new file mode 100644 index 0000000..fe89c91 --- /dev/null +++ b/data/output/0072733a68b164630221341584ce2e235c5ee547.json @@ -0,0 +1,516 @@ +{ + "file_name": "0072733a68b164630221341584ce2e235c5ee547.txt", + "total_words": 1322, + "top_n_words": [ + { + "word": "the", + "count": 55 + }, + { + "word": "to", + "count": 38 + }, + { + "word": "and", + "count": 36 + }, + { + "word": "a", + "count": 35 + }, + { + "word": "she", + "count": 33 + }, + { + "word": "her", + "count": 32 + }, + { + "word": "copeland", + "count": 24 + }, + { + "word": "s", + "count": 23 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "in", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "'She then looked at us.", + "length": 23 + }, + { + "text": "I am absolutely amazed.", + "length": 23 + }, + { + "text": "11:08 EST, 18 May 2012 .", + "length": 24 + }, + { + "text": "23:08 EST, 18 May 2012 .", + "length": 24 + }, + { + "text": "'It's very close to home.", + "length": 25 + }, + { + "text": "Aimee nodded, her father said.", + "length": 30 + }, + { + "text": "'She didn't draw back in horror.", + "length": 32 + }, + { + "text": "'Her spirits are extraordinarily high.", + "length": 38 + }, + { + "text": "going to still be on dialysis for a while.", + "length": 42 + }, + { + "text": "'And it turned out she was allergic to soy.", + "length": 43 + }, + { + "text": "She knew the condition she was in,' he wrote.", + "length": 45 + }, + { + "text": "'She's going to be here for months,' he said.", + "length": 45 + }, + { + "text": "She's going to need to learn to use prosthetics.", + "length": 48 + }, + { + "text": "'She's going to need to regrow skin that was removed.", + "length": 53 + }, + { + "text": "Organ failure and shock are also common complications.", + "length": 54 + }, + { + "text": "The affliction can destroy muscle, fat and skin tissue.", + "length": 55 + }, + { + "text": "' He recalled how Aimee's eyes widened, her jaw dropped.", + "length": 56 + }, + { + "text": "He said she 'shed no tears, she never batted an eyelash.", + "length": 56 + }, + { + "text": "We all understood her next three words - \"Let's do this\".", + "length": 57 + }, + { + "text": "Only a week ago, doctors gave her little chance of survival.", + "length": 60 + }, + { + "text": "Nadine Kaslow, chief psychologist at Grady Hospital in Atlanta.", + "length": 63 + }, + { + "text": "'We explained that she had become a symbol of hope, love and faith.", + "length": 67 + }, + { + "text": "Andy Copeland said his daughter had sought treatment for a skin rash.", + "length": 69 + }, + { + "text": "Then, Copeland took his daughter's hands and held them up to her face.", + "length": 70 + }, + { + "text": "It was not immediately clear whether the surgeries had already happened.", + "length": 72 + }, + { + "text": "It makes you realize anything could happen at any time,' Miss Dermo said.", + "length": 73 + }, + { + "text": "Doctors had to amputate most of Miss Copeland's left leg to save her life.", + "length": 74 + }, + { + "text": "'She was a vegetarian and she had a soy-based diet,' Copeland's father said.", + "length": 76 + }, + { + "text": "Because it is so virulent, the bacteria spreads rapidly throughout the body.", + "length": 76 + }, + { + "text": "But they learned today that they would have to amputate that and her fingers.", + "length": 77 + }, + { + "text": "Amputation can become necessary if the disease spreads through an arm or leg.", + "length": 77 + }, + { + "text": "He said his daughter seems aware that she's in the hospital after an accident.", + "length": 78 + }, + { + "text": "'We told her that the world loved and admired her,' he wrote in Friday's update.", + "length": 80 + }, + { + "text": "'She smiled and raised her hands up, carefully examining them,' her father wrote.", + "length": 81 + }, + { + "text": "Dermo said Copeland's illness has been weighing on the minds of people on campus.", + "length": 81 + }, + { + "text": "' Cherished: Miss Copeland, a 24-year-old graduate from Georgia, has fun with friends .", + "length": 87 + }, + { + "text": "But in recent days she has been alert and bored enough to have asked for a book to read.", + "length": 88 + }, + { + "text": "Some students walked to the blood drive, while others drove from other parts of the state.", + "length": 90 + }, + { + "text": "Aimee Copeland's father recounted the conversation in an update on his Facebook page today.", + "length": 91 + }, + { + "text": "The disease develops when the bacteria enters the body, often through a minor cut or scrape.", + "length": 92 + }, + { + "text": "Even the slightest amount of good news has been enough to raise the hopes of fellow students.", + "length": 93 + }, + { + "text": "As the bacteria multiply, they release toxins that kill tissue and cut off blood flow to the area.", + "length": 98 + }, + { + "text": "He explained that doctors believed her hands were hampering her progress, and they must be removed.", + "length": 99 + }, + { + "text": "I was crying because I am a proud father of an incredibly courageous young lady', Andy Copeland wrote.", + "length": 102 + }, + { + "text": "Doctors initially feared it might be a symptom of lupus, but tests revealed Copeland had a food allergy.", + "length": 104 + }, + { + "text": "The flesh-eating bacteria, Aeromonas hydrophila, emit toxins that cut off blood flow to parts of the body.", + "length": 106 + }, + { + "text": "Kara Dermo, a chemistry student who worked with Copeland at the Sunnyside Cafe, was one of the first in line.", + "length": 109 + }, + { + "text": "Losing a limb is extremely difficult emotionally, and can be particularly difficult for young people, said Dr.", + "length": 110 + }, + { + "text": "'This doctor can't fathom a reason for why she's improved the way she has,' Mr Copeland said earlier this week.", + "length": 111 + }, + { + "text": "She was among the friends invited to try the zip line that sent Copeland plunging to the rocky river, but declined.", + "length": 115 + }, + { + "text": "'There is a process that they go through, a grief process,' said Kaslow, who is not involved in Miss Copeland's care.", + "length": 117 + }, + { + "text": "'Necrotizing' refers to something that causes body tissue to die, and the infection can destroy skin, muscles and fat.", + "length": 118 + }, + { + "text": "Symptoms include small, red lumps or bumps on the skin, rapidly-spreading bruising, sweating, chills, fever and nausea.", + "length": 119 + }, + { + "text": "Patients may undergo skin grafts after the infection has cleared up, to help the healing process or for aesthetic reasons.", + "length": 122 + }, + { + "text": "On May 4, she was diagnosed with the rare infection and flown 200 miles to Augusta for treatment by specialists at Doctors Hospital.", + "length": 132 + }, + { + "text": "Sufferers must be treated immediately to prevent death, and are usually given powerful antibiotics and surgery to remove dead tissue.", + "length": 133 + }, + { + "text": "He said the family also wanted to make sure a hospital counselor was available to help Copeland once she is informed of her condition.", + "length": 134 + }, + { + "text": "'Let's do this': Aimee Copeland did not cry when her father told her she would have to have her remaining foot and fingers amputated .", + "length": 134 + }, + { + "text": "The 24-year-old student from an Atlanta suburb remains in a critical condition as she battles an infection called necrotizing fasciitis.", + "length": 136 + }, + { + "text": "But for now, they're sparing her the details of her condition until after she has been removed from a respirator and is breathing on her own.", + "length": 141 + }, + { + "text": "Distraught: Paige Copeland, center, cries as her parents Donna and Andy Copeland speak about their daughter Aimee after she had her leg amputated .", + "length": 147 + }, + { + "text": "' Traumatic: The 24-year-old student from an Atlanta suburb remains in a critical condition as she battles an infection called necrotizing fasciitis .", + "length": 150 + }, + { + "text": "A young Georgia woman fighting a flesh-eating bacteria has learned she will lose her hands and remaining foot, and responded by saying 'Let's do this'.", + "length": 151 + }, + { + "text": "Copeland suffered a deep gash in her leg after falling from a homemade zip line on May 1 over a Georgia river, and the bacteria took hold in the wound.", + "length": 151 + }, + { + "text": "Copeland's father said she faces a long recovery not just from her amputation but also from kidney failure and other organ damage caused by the infection.", + "length": 154 + }, + { + "text": "Andy Copeland wrote about the difficult talk he had a day earlier with his daughter after the 24-year-old woman contracted the bacteria after an accident.", + "length": 154 + }, + { + "text": "'There is shock, disbelief, anger, sadness and then a period of reconciling one to the situation and healing and figuring out how they are going to move forward in their life.", + "length": 175 + }, + { + "text": "Copeland said he was finally able to tell his daughter on Thursday what had happened since that outing, and how she's been the focus of an outpouring of love from around the world.", + "length": 180 + }, + { + "text": "' More than 50 people lined up in the first hour for a blood drive earlier this week at the gymnasium at the University of West Georgia, where Copeland attends school 200 miles from the Augusta hospital.", + "length": 203 + }, + { + "text": "Doctors hoped they would be able to save her remaining foot after two days of treatment using a hyperbaric chamber, in which patients breathe pure oxygen to boost white blood cells and accelerate healing.", + "length": 204 + }, + { + "text": "She has been unable to speak because of her breathing tube in he throat, but her parents said they've learned to read lips and are now able to communicate with their daughter, who was also asking for ice-cream.", + "length": 210 + }, + { + "text": "Fun loving: Aimee's father, Andy, says describes her as 'outdoorsy' and a 'lover of people' Necrotizing fasciitis, more commonly known as 'flesh-eating disease', is a rare but extremely vicious bacterial infection.", + "length": 214 + }, + { + "text": "' Prayers: Aimee Copeland's father has said the progress she has made after contracting the aggressive virus was a 'miracle' Doctors at the local emergency room in Carrollton closed the wound she suffered in the zip line accident with nearly two dozen staples, but it became infected within days.", + "length": 296 + }, + { + "text": "' Much-loved: Aimee's friends and family are praying for her survival and eventual recovery (from left - Aimee, mother Donna, father Andy and sister Paige) Some news reports have said Miss Copeland was recently diagnosed with lupus, a chronic disease that compromises the immune system, but her father said that was not true.", + "length": 325 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5714447647333145 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:17.291844636Z", + "first_section_created": "2025-12-23T09:36:17.2934264Z", + "last_section_published": "2025-12-23T09:36:17.294004823Z", + "all_results_received": "2025-12-23T09:36:17.387878211Z", + "output_generated": "2025-12-23T09:36:17.388215625Z", + "total_processing_time_ms": 96, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 93, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:17.2934264Z", + "publish_time": "2025-12-23T09:36:17.293646208Z", + "first_worker_start": "2025-12-23T09:36:17.294071726Z", + "last_worker_end": "2025-12-23T09:36:17.386782Z", + "total_journey_time_ms": 93, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:17.294145029Z", + "start_time": "2025-12-23T09:36:17.294273434Z", + "end_time": "2025-12-23T09:36:17.29443884Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:17.294336Z", + "start_time": "2025-12-23T09:36:17.294475Z", + "end_time": "2025-12-23T09:36:17.386782Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 92 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:17.294463141Z", + "start_time": "2025-12-23T09:36:17.294734252Z", + "end_time": "2025-12-23T09:36:17.294860457Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:17.294010723Z", + "start_time": "2025-12-23T09:36:17.294071726Z", + "end_time": "2025-12-23T09:36:17.294111227Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:17.293827716Z", + "publish_time": "2025-12-23T09:36:17.294004823Z", + "first_worker_start": "2025-12-23T09:36:17.294337336Z", + "last_worker_end": "2025-12-23T09:36:17.386827Z", + "total_journey_time_ms": 92, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:17.294351137Z", + "start_time": "2025-12-23T09:36:17.294399139Z", + "end_time": "2025-12-23T09:36:17.294456441Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:17.294702Z", + "start_time": "2025-12-23T09:36:17.294857Z", + "end_time": "2025-12-23T09:36:17.386827Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 91 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:17.294274134Z", + "start_time": "2025-12-23T09:36:17.294341236Z", + "end_time": "2025-12-23T09:36:17.29444074Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:17.294292434Z", + "start_time": "2025-12-23T09:36:17.294337336Z", + "end_time": "2025-12-23T09:36:17.294371638Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 183, + "min_processing_ms": 91, + "max_processing_ms": 92, + "avg_processing_ms": 91, + "median_processing_ms": 92, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3785, + "slowest_section_id": 0, + "slowest_section_time_ms": 93 + } +} diff --git a/data/output/00731254dc5162758968c46e573d7b0725166a1a.json b/data/output/00731254dc5162758968c46e573d7b0725166a1a.json new file mode 100644 index 0000000..ba97f3f --- /dev/null +++ b/data/output/00731254dc5162758968c46e573d7b0725166a1a.json @@ -0,0 +1,270 @@ +{ + "file_name": "00731254dc5162758968c46e573d7b0725166a1a.txt", + "total_words": 601, + "top_n_words": [ + { + "word": "the", + "count": 25 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "he", + "count": 13 + }, + { + "word": "his", + "count": 13 + }, + { + "word": "was", + "count": 13 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "manchester", + "count": 10 + }, + { + "word": "for", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'His face is burned and he is not very well,' she said.", + "length": 55 + }, + { + "text": "They've all been very good to me, they've helped pull me through it all.", + "length": 72 + }, + { + "text": "'We just want whoever did this caught so that Callum can put it behind him.", + "length": 75 + }, + { + "text": "He had spent the day selling poppies and was wearing his camouflage uniform.", + "length": 76 + }, + { + "text": "' 'It is pure luck that he did not sustain more serious burns to his face and body.", + "length": 83 + }, + { + "text": "Callum suffered minor burns to his face and singed hairs on his face and right forearm.", + "length": 87 + }, + { + "text": "Undeterred: Aerosol attack victim Callum Watkins, 15, laid a wreath for Remembrance Sunday .", + "length": 92 + }, + { + "text": "He said afterwards: 'Most of my mates were surprised that I still carried on selling the poppies.", + "length": 97 + }, + { + "text": "His family has even been invited to afternoon tea with the Lord Mayor of Manchester Susan Cooley.", + "length": 97 + }, + { + "text": "Parade: The teenager joined friends from the Duke of Lancaster's Cadet Regiment for the Manchester service .", + "length": 108 + }, + { + "text": "This is the 15-year-old poppy seller who refused to quit after being attacked with a makeshift flamethrower.", + "length": 108 + }, + { + "text": "The tribute by Callum was one of thousands by veterans and their families in Manchester city centre yesterday.", + "length": 110 + }, + { + "text": "'Understandably, both he and his family are in a state of total shock and cannot believe someone would do this.", + "length": 111 + }, + { + "text": "'We need to find who ever is responsible for this crime and I would therefore appeal to anyone who has information.", + "length": 115 + }, + { + "text": "His mother Kirsty Sloan said last week he was 'very upset' by the incident and confirmed he was now recovering at home.", + "length": 119 + }, + { + "text": "The schoolboy was left with minor burns after a stranger lit an aerosol spray and blasted it towards his face last week.", + "length": 120 + }, + { + "text": "Undeterred, he vowed to carry on selling poppies for the Royal British Legion - and said he wants to train as an army paramedic.", + "length": 128 + }, + { + "text": "Callum laid his wreath with friends from the Duke of Lancaster's Cadet Regiment during yesterday's Remembrance Sunday parade in Manchester.", + "length": 139 + }, + { + "text": "' At yesterday's service she said the family had been inundated with well-wishers nationwide and an invitation for Callum to meet RAF officials.", + "length": 144 + }, + { + "text": "' Callum's attacker, who appeared to be under the influence of alcohol, was described as black or Asian, 5ft 8in tall and wearing a dark hooded top.", + "length": 148 + }, + { + "text": "Callum Watkins laid a wreath for Remembrance Sunday in his full cadet's uniform just days after he was attacked at a bus stop in Manchester city centre.", + "length": 152 + }, + { + "text": "' The teenager was waiting for a bus home at 6pm on November 1 when he was attacked outside Manchester Art Gallery, less than 100 yards from the city's cenotaph.", + "length": 161 + }, + { + "text": "Sir Richard Leese, leader of Manchester city council, said: 'The centenary certainly isn't necessarily something to celebrate, but it is something that we should remember.", + "length": 171 + }, + { + "text": "A service was led by Bishop of Manchester David Walker and faith leaders on the 100th anniversary of the First World War, which killed 23,792 soldiers from Greater Manchester.", + "length": 175 + }, + { + "text": "Detective Inspector Liam Boden of Greater Manchester Police said last week it was an 'appalling attack on a young man who was raising money to help remember all those who gave their lives'.", + "length": 189 + }, + { + "text": "'Given the initial description we have of the offender, it may be that he was under the influence of something but whatever his motivation, his violent actions could have scarred this young man for life.", + "length": 203 + }, + { + "text": "The 15-year-old said: 'Most of my mates were surprised that I still carried on selling the poppies' He added: 'At this stage of our inquiries, we're keeping an open mind as to what motivated the offender to commit such an act.", + "length": 226 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.839992105960846 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:17.795037341Z", + "first_section_created": "2025-12-23T09:36:17.797176727Z", + "last_section_published": "2025-12-23T09:36:17.797368535Z", + "all_results_received": "2025-12-23T09:36:17.865802797Z", + "output_generated": "2025-12-23T09:36:17.865982604Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:17.797176727Z", + "publish_time": "2025-12-23T09:36:17.797368535Z", + "first_worker_start": "2025-12-23T09:36:17.79797576Z", + "last_worker_end": "2025-12-23T09:36:17.864845Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:17.797919457Z", + "start_time": "2025-12-23T09:36:17.79798556Z", + "end_time": "2025-12-23T09:36:17.798043862Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:17.798172Z", + "start_time": "2025-12-23T09:36:17.798315Z", + "end_time": "2025-12-23T09:36:17.864845Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:17.797897257Z", + "start_time": "2025-12-23T09:36:17.79797576Z", + "end_time": "2025-12-23T09:36:17.798043462Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:17.797906157Z", + "start_time": "2025-12-23T09:36:17.797998361Z", + "end_time": "2025-12-23T09:36:17.798030762Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3408, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/007381430b63f56f0ac60e08f7d85b0e20e14c47.json b/data/output/007381430b63f56f0ac60e08f7d85b0e20e14c47.json new file mode 100644 index 0000000..1c23259 --- /dev/null +++ b/data/output/007381430b63f56f0ac60e08f7d85b0e20e14c47.json @@ -0,0 +1,314 @@ +{ + "file_name": "007381430b63f56f0ac60e08f7d85b0e20e14c47.txt", + "total_words": 460, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "his", + "count": 14 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "was", + "count": 10 + }, + { + "word": "soldier", + "count": 9 + }, + { + "word": "german", + "count": 8 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "gordon", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "dead.", + "length": 5 + }, + { + "text": "The family .", + "length": 12 + }, + { + "text": "in France with the U.", + "length": 21 + }, + { + "text": "combing through war records.", + "length": 28 + }, + { + "text": "If the tests are positive the .", + "length": 31 + }, + { + "text": "killed and wounded with each day.", + "length": 33 + }, + { + "text": "Now six decades after his death, .", + "length": 34 + }, + { + "text": "the list of logistical priorities.", + "length": 34 + }, + { + "text": "They think that his remains were .", + "length": 34 + }, + { + "text": "remains will be returned to Canada.", + "length": 35 + }, + { + "text": "Canadian soldier Lawrence S Gordon .", + "length": 36 + }, + { + "text": "Food, guns, ammunition and medical .", + "length": 36 + }, + { + "text": "Like hundreds of British Tommies and .", + "length": 38 + }, + { + "text": "When his body was removed his dog-tag .", + "length": 39 + }, + { + "text": "the bones are those of Lawrence S Gordon.", + "length": 41 + }, + { + "text": "believed to be an enemy soldier by the Allies.", + "length": 46 + }, + { + "text": "Army, found himself without a jacket having lost .", + "length": 50 + }, + { + "text": "Last week, they were present along with Jed Henry, whose .", + "length": 58 + }, + { + "text": "his own during fighting at the Battle of Normandy 69 years ago.", + "length": 63 + }, + { + "text": "decades what ever had become of him and whether he was really alive or .", + "length": 72 + }, + { + "text": "now hope that by Christmas, DNA comparison tests will establish whether .", + "length": 73 + }, + { + "text": "eventually placed in a cask and interred at a German cemetery at Heines .", + "length": 73 + }, + { + "text": "supplies were paramount as hundreds of soldiers on both sides were being .", + "length": 74 + }, + { + "text": "American GIs, Lawrence S Gordon, a Canadian who had volunteered to serve .", + "length": 74 + }, + { + "text": "The German war memorial at Mont-de-Huisnes, Huisnes-sur-Mer, Normandy, France .", + "length": 79 + }, + { + "text": "Sur Mer in Normandy along with the bones of 12,000 other soldiers, many unidentified.", + "length": 85 + }, + { + "text": "An allied soldier confronts a German soldier during the Normandy Landings on June 6, 1944.", + "length": 90 + }, + { + "text": "Lawrence Gordon's nephews Lawrence and Sam believe they have traced his body after painstakingly .", + "length": 98 + }, + { + "text": "His family believe that because Gordon was wearing a German jacket, a mistake was made and he was .", + "length": 99 + }, + { + "text": "Allied troops advance on a beach during the invasion of the Allies in Normandy, France on June 6, 1944.", + "length": 103 + }, + { + "text": "identification was missing but a bloodstained wallet found in his pocket helped to work out who he was.", + "length": 103 + }, + { + "text": "Little did he know that his 'borrowing' of a German battledress top would lead his family to wonder for six .", + "length": 109 + }, + { + "text": "During the chaos in France following the D-day invasion of Normandy in 1944, supplies of uniforms were low down on .", + "length": 116 + }, + { + "text": "grandfather served with soldier Gordon, at the cemetery when the remains of the 'unknown German soldier' were exhumed.", + "length": 118 + }, + { + "text": "It was eventually sent back to his family in Canada with a letter explaining that his wallet had been found but not his body.", + "length": 125 + }, + { + "text": "On August 13, 1944, Private First Class Gordon was killed in Normandy with another soldier in the turret of a machine gun-armored car.", + "length": 134 + }, + { + "text": "It is likely that he helped himself to a German military jacket from a pile of garments taken from prisoners of war who had surrendered.", + "length": 136 + }, + { + "text": "A family of a Canadian soldier killed during the Second World War believe that they may finally have found his remains after he was mistakenly buried as an unknown soldier in a German cemetery.", + "length": 193 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5506508350372314 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:18.298119142Z", + "first_section_created": "2025-12-23T09:36:18.298443155Z", + "last_section_published": "2025-12-23T09:36:18.298629063Z", + "all_results_received": "2025-12-23T09:36:18.359564822Z", + "output_generated": "2025-12-23T09:36:18.359717828Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 60, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:18.298443155Z", + "publish_time": "2025-12-23T09:36:18.298629063Z", + "first_worker_start": "2025-12-23T09:36:18.299269588Z", + "last_worker_end": "2025-12-23T09:36:18.358696Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:18.299205986Z", + "start_time": "2025-12-23T09:36:18.299269588Z", + "end_time": "2025-12-23T09:36:18.29930749Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:18.299419Z", + "start_time": "2025-12-23T09:36:18.299566Z", + "end_time": "2025-12-23T09:36:18.358696Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:18.299218086Z", + "start_time": "2025-12-23T09:36:18.299275389Z", + "end_time": "2025-12-23T09:36:18.299341291Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:18.299265988Z", + "start_time": "2025-12-23T09:36:18.299320491Z", + "end_time": "2025-12-23T09:36:18.299340491Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2667, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/0073d06343b3afe2fc6d52713003cae8d8f555df.json b/data/output/0073d06343b3afe2fc6d52713003cae8d8f555df.json new file mode 100644 index 0000000..4309628 --- /dev/null +++ b/data/output/0073d06343b3afe2fc6d52713003cae8d8f555df.json @@ -0,0 +1,298 @@ +{ + "file_name": "0073d06343b3afe2fc6d52713003cae8d8f555df.txt", + "total_words": 714, + "top_n_words": [ + { + "word": "the", + "count": 40 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "to", + "count": 25 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "for", + "count": 10 + }, + { + "word": "has", + "count": 10 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "water", + "count": 10 + }, + { + "word": "fabric", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "This means they won't irritate the skin.", + "length": 40 + }, + { + "text": "‘We decided to scrap the idea and look for the perfect alternative.", + "length": 69 + }, + { + "text": "Water based liquids will form a 150 degree sphere and roll right off!", + "length": 69 + }, + { + "text": "As a result, this barrier protects your shirt from potential accidents.", + "length": 71 + }, + { + "text": "‘This is because the fabric is layered with billions of silica particles.", + "length": 75 + }, + { + "text": "Water-based liquids form a 150-degree sphere on this material and roll off .", + "length": 76 + }, + { + "text": "He then began looking at ways to incorporate the technology into the fabric.", + "length": 76 + }, + { + "text": "If the Kickstarter campaign is a success, Mr Shaw expects to begin shipping in July.", + "length": 84 + }, + { + "text": "He created a fabric that has the nanotechnology bonded to the fibres on a microscopic level.", + "length": 92 + }, + { + "text": "The range comes in a variety of different colours and patterns, with trunks starting from $47 (£28).", + "length": 101 + }, + { + "text": "’ What Mr Shaw and his team came up with was a polyester-blend hydrophobic nanomaterial technology.", + "length": 101 + }, + { + "text": "When water-based liquids hit the surface of this material they form a 150-degree sphere and roll off.", + "length": 101 + }, + { + "text": "The technology works by bonding billions of nanoparticles to individual fibres on a microscopic level.", + "length": 102 + }, + { + "text": "When water-based liquids hit the surface of this material they form a 150-degree sphere and roll off .", + "length": 102 + }, + { + "text": "Hydrophobic nanotech clothing has been around for a number of years, but it always has a limited lifespan.", + "length": 106 + }, + { + "text": "The technology is thought to work by bonding billions of nanoparticles to individual fibres on a microscopic level.", + "length": 115 + }, + { + "text": "Despite this, the T-shirts are said to feel ‘no different to any other items that could be found in a clothing shop.", + "length": 118 + }, + { + "text": "It looks like ordinary swimwear from the outside, but when it is covered in liquid, the garment instantly repels water.", + "length": 119 + }, + { + "text": "For instance, the Silic water-repelling T-shirt, was released last year but can only maintain its properties for 80 washes.", + "length": 123 + }, + { + "text": "The ‘Frank Anthony’ swimwear, however, promises that its water-repellent abilities last for the last-time of the shorts.", + "length": 124 + }, + { + "text": "The clothing is made from polyester, which has been infused with a combination of chemicals that make it resistant to water.", + "length": 124 + }, + { + "text": "The tops can resist any spills and splashes including Coca-Cola, tomato ketchup, mustard, milkshakes, beer, ink and even red wine.", + "length": 130 + }, + { + "text": "A quick dip in the pool can sound appealing - until you realise you’ll be left wearing dripping swimwear for the rest of the day.", + "length": 131 + }, + { + "text": "’ University student Patel from San Francisco made a prototype using a spray-on chemical, but realised it would only last for one wash.", + "length": 137 + }, + { + "text": "Now one Toronto-based entrepreneur has come up with a range of swim trunks that means you can avoid leaving a trail of puddles in your wake.", + "length": 140 + }, + { + "text": "Inventor, Franky Shaw, has set-up a Kickstarter fund for a swimwear range which is made from hydrophobic material - a fabric that repels water.", + "length": 143 + }, + { + "text": "’ The material in the Silic shirt, pictured, created by San Fransisco-based student Aamir Patel, has billions of silica particles bonded to the fibres.", + "length": 153 + }, + { + "text": "‘This fabric has proven to drastically reduce dry-times by up to 95 per cent in contrast to regular 100 per cent polyester swim shorts,’ the company claims.", + "length": 160 + }, + { + "text": "According to Patel: ‘Most liquid molecules will not be able to touch the fabric because of a microscopic layer of air that forms between the liquid and fabric.", + "length": 161 + }, + { + "text": "‘This fabric has proven to drastically reduce dry-times by up to 95 per cent in contrast to regular 100 per cent polyester swim shorts,’ the company claims .", + "length": 161 + }, + { + "text": "‘We decided to look at different topical applications for use but shortly found out they changed the texture of the fabric and had no way of being used on garments.", + "length": 166 + }, + { + "text": "‘We were tired of having to change shorts every time you leave the beach, having car seats soaked and not being able to go from the beach to a restaurant,’ Mr Shaw said.", + "length": 173 + }, + { + "text": "If you're clumsy and constantly spilling food down yourself, or just lazy and don't like washing your clothes, a student has invented the answer to your problems - a T-shirt that is impossible to stain.", + "length": 202 + }, + { + "text": "Created by Toronto-based entrepreneur, Franky Shaw, the swim trunks are made from a unique polyester-based hydrophobic material with prices starting from $47 (£28) It looks like ordinary swimwear, but when covered in liquid, the ‘Frank Anthony’ trunks instantly repels water .", + "length": 281 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.40532931685447693 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:18.799404971Z", + "first_section_created": "2025-12-23T09:36:18.800854329Z", + "last_section_published": "2025-12-23T09:36:18.801091439Z", + "all_results_received": "2025-12-23T09:36:18.865550938Z", + "output_generated": "2025-12-23T09:36:18.865734745Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:18.800854329Z", + "publish_time": "2025-12-23T09:36:18.801091439Z", + "first_worker_start": "2025-12-23T09:36:18.801597459Z", + "last_worker_end": "2025-12-23T09:36:18.86471Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:18.801556757Z", + "start_time": "2025-12-23T09:36:18.801597459Z", + "end_time": "2025-12-23T09:36:18.801645061Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:18.801824Z", + "start_time": "2025-12-23T09:36:18.801972Z", + "end_time": "2025-12-23T09:36:18.86471Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:18.801581658Z", + "start_time": "2025-12-23T09:36:18.801648961Z", + "end_time": "2025-12-23T09:36:18.801772266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:18.801533756Z", + "start_time": "2025-12-23T09:36:18.801605459Z", + "end_time": "2025-12-23T09:36:18.801638761Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4180, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/0074140a4fc4e6300ea946e79ee7b2ddf65d4a2b.json b/data/output/0074140a4fc4e6300ea946e79ee7b2ddf65d4a2b.json new file mode 100644 index 0000000..300bbef --- /dev/null +++ b/data/output/0074140a4fc4e6300ea946e79ee7b2ddf65d4a2b.json @@ -0,0 +1,242 @@ +{ + "file_name": "0074140a4fc4e6300ea946e79ee7b2ddf65d4a2b.txt", + "total_words": 431, + "top_n_words": [ + { + "word": "the", + "count": 22 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "half", + "count": 7 + }, + { + "word": "his", + "count": 7 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "we", + "count": 7 + }, + { + "word": "arsenal", + "count": 6 + }, + { + "word": "at", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "Some players were not at their best.", + "length": 36 + }, + { + "text": "'We did not have enough possession in the game.", + "length": 47 + }, + { + "text": "He said: 'We had problems to get our flow going.", + "length": 48 + }, + { + "text": "' Wenger, too, was unhappy with his side's defending throughout the game.", + "length": 73 + }, + { + "text": "'When you don't score goal number three you are under threat to concede a second.", + "length": 81 + }, + { + "text": "'To concede the corner that was a bit unlucky but then we didn't jump with Skrtel.", + "length": 82 + }, + { + "text": "VIDEO Scroll down to see how last season's Anfield nightmare came back to haunt Gunners .", + "length": 89 + }, + { + "text": "He added: 'Overall I think once they were down to 10 men they were not dangerous any more.", + "length": 90 + }, + { + "text": "Liverpool boss Brendan Rodgers and Arsenal manager Wenger watch on from the touchline on Sunday .", + "length": 97 + }, + { + "text": "Arsene Wenger was frustrated with some of his players' performances in their 2-2 draw at Liverpool .", + "length": 100 + }, + { + "text": "Gunners right-back Mathieu Debuchy celebrates scoring Arsenal's equaliser on the stroke of half-time .", + "length": 102 + }, + { + "text": "Olivier Giroud looked to have given Arsenal all three points after his precise finish in the second half .", + "length": 106 + }, + { + "text": "Brazil international Philippe Coutinho is congratulated by his team-mates after giving Liverpool the lead .", + "length": 107 + }, + { + "text": "In the first half it was down to tactical and some psychological reasons: maybe bad memories from last year.", + "length": 108 + }, + { + "text": "Liverpool centre back Martin Skrtel celebrates after scoring a dramatic injury-time equaliser against Arsenal .", + "length": 111 + }, + { + "text": "After the game Gunners boss Wenger admitted his side played with too much anxiety throughout, sitting off their opposition far too much as a result.", + "length": 148 + }, + { + "text": "'I felt we played with the handbrake too much, in the first half especially, in the second half we played much better but at 2-1 we sat back too much.", + "length": 150 + }, + { + "text": "Arsenal boss Arsene Wenger felt bad memories of last season's 5-1 thrashing at the hands of Liverpool might have affected some of his players during Sunday's 2-2 draw at Anfield.", + "length": 178 + }, + { + "text": "'Overall it is a fair result but a frustrating result for us because they came back when we had plenty of defenders on the pitch and could have defended the set-piece much better.", + "length": 179 + }, + { + "text": "Brendan Rodgers' side were dominant in the opening half but both sides went into the break level at 1-1, a stark contrast to the last time the two sides met in February - when Liverpool were already leading 4-0.", + "length": 211 + }, + { + "text": "This time around Olivier Giroud looked to have given Arsenal a welcome three points with his second-half goal, but Martin Skrtel's injury-time equaliser earned the hosts a share of the spoils - leaving the Gunners four points off fourth-placed West Ham as a result.", + "length": 265 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.45133689045906067 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:19.301878704Z", + "first_section_created": "2025-12-23T09:36:19.302240218Z", + "last_section_published": "2025-12-23T09:36:19.302406825Z", + "all_results_received": "2025-12-23T09:36:19.358956001Z", + "output_generated": "2025-12-23T09:36:19.359108908Z", + "total_processing_time_ms": 57, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 56, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:19.302240218Z", + "publish_time": "2025-12-23T09:36:19.302406825Z", + "first_worker_start": "2025-12-23T09:36:19.302968148Z", + "last_worker_end": "2025-12-23T09:36:19.358151Z", + "total_journey_time_ms": 55, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:19.302958147Z", + "start_time": "2025-12-23T09:36:19.30302625Z", + "end_time": "2025-12-23T09:36:19.303087252Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:19.303124Z", + "start_time": "2025-12-23T09:36:19.303265Z", + "end_time": "2025-12-23T09:36:19.358151Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:19.302920346Z", + "start_time": "2025-12-23T09:36:19.302968148Z", + "end_time": "2025-12-23T09:36:19.303052951Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:19.302913245Z", + "start_time": "2025-12-23T09:36:19.302971648Z", + "end_time": "2025-12-23T09:36:19.302995149Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 54, + "min_processing_ms": 54, + "max_processing_ms": 54, + "avg_processing_ms": 54, + "median_processing_ms": 54, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2428, + "slowest_section_id": 0, + "slowest_section_time_ms": 55 + } +} diff --git a/data/output/007416dba45b3e8c919792b6e0cdfb2f642f2f72.json b/data/output/007416dba45b3e8c919792b6e0cdfb2f642f2f72.json new file mode 100644 index 0000000..110ace0 --- /dev/null +++ b/data/output/007416dba45b3e8c919792b6e0cdfb2f642f2f72.json @@ -0,0 +1,386 @@ +{ + "file_name": "007416dba45b3e8c919792b6e0cdfb2f642f2f72.txt", + "total_words": 777, + "top_n_words": [ + { + "word": "the", + "count": 54 + }, + { + "word": "a", + "count": 29 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "her", + "count": 19 + }, + { + "word": "princess", + "count": 19 + }, + { + "word": "s", + "count": 19 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "victoria", + "count": 13 + }, + { + "word": "in", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "In .", + "length": 4 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "It's .", + "length": 6 + }, + { + "text": "As part .", + "length": 9 + }, + { + "text": "She wore a .", + "length": 12 + }, + { + "text": "Bianca London .", + "length": 15 + }, + { + "text": "It's a washout!", + "length": 15 + }, + { + "text": "Prince Carl Philip.", + "length": 19 + }, + { + "text": "and Lucy Waterlow .", + "length": 19 + }, + { + "text": "regardless of gender.", + "length": 21 + }, + { + "text": "pastel pink cardigan.", + "length": 21 + }, + { + "text": "When it rains, it pours!", + "length": 24 + }, + { + "text": "Before the rain fell, the .", + "length": 27 + }, + { + "text": "side by blowing some himself.", + "length": 29 + }, + { + "text": "He then revealed his playful .", + "length": 30 + }, + { + "text": "The present Queen Silvia, 70, was of .", + "length": 38 + }, + { + "text": "The former personal trainer, 40, held .", + "length": 39 + }, + { + "text": "who clamored to wish her a happy birthday.", + "length": 42 + }, + { + "text": "the stick while his daughter blew bubbles.", + "length": 42 + }, + { + "text": "lemon suit with a white lace coat over the top.", + "length": 47 + }, + { + "text": "She was happy to pose for pictures with royal fans .", + "length": 52 + }, + { + "text": "course present today to celebrate her daughter's birthday.", + "length": 58 + }, + { + "text": "a national day of celebration named after her - Victoria Day.", + "length": 61 + }, + { + "text": "little girl is second in line to the throne thanks to the rules of .", + "length": 68 + }, + { + "text": "be making her royal debut at the event following her engagement to .", + "length": 68 + }, + { + "text": "of the celebrations, there's a fair, golf tournament and pop concert.", + "length": 69 + }, + { + "text": "Crown Princess looked smart but summery in a white shift dress with a .", + "length": 71 + }, + { + "text": "A wave from the birthday girl: Crown Princess Victoria turns 37 today .", + "length": 71 + }, + { + "text": "the Swedish press, the news centred on the fact Sofia Hellqvist would .", + "length": 71 + }, + { + "text": "succession being changed to allow a Swedish monarch's first born to rule .", + "length": 74 + }, + { + "text": "Befitting for a princess, Victoria's birthday is never just a quiet family gathering.", + "length": 85 + }, + { + "text": "Adorable: Princess Estelle stole the show with her cute outfit and delightful smile .", + "length": 85 + }, + { + "text": "Gifts galore: The crown princess met the crowd gathered to wish her a happy birthday .", + "length": 86 + }, + { + "text": "A right royal day out: Prince Carl Philip and Sofia Hellqvist cheer as they watch the show .", + "length": 92 + }, + { + "text": "Enjoying the show: The family appeared to be enjoying their celebratory day despite a spot of rain .", + "length": 100 + }, + { + "text": "The toddler looked adorable in a floral dress with white tights and shoes and had a pretty bow in her hair.", + "length": 107 + }, + { + "text": "Still looking glam: Wellwishers who joined in the celebrations show off their dresses under clear ponchos .", + "length": 107 + }, + { + "text": "World of their own: The prince blew some bubbles himself as the pair seemed oblivious to the gathered crowd .", + "length": 109 + }, + { + "text": "Pretty as a princess: The little girl wore a floral pink dress, white tights and shoes and a bow in her hair .", + "length": 110 + }, + { + "text": "She's only two years old, but Princess Estelle of Sweden already knows how to strike a pose and charm an audience.", + "length": 114 + }, + { + "text": "While Victoria met her public, her husband Prince Daniel kept their daughter entertained with a bottle of bubbles.", + "length": 114 + }, + { + "text": "The birthday girl and heir to the throne then went to greet the gathered well-wishers who gave her flowers and presents.", + "length": 120 + }, + { + "text": "Three generations of royals: From left, Sweden's Prince Daniel, Queen Silvia, Princess Estelle and Crown Princess Victoria .", + "length": 124 + }, + { + "text": "Shops adorned with Swedish flags and bunting stay open for longer and people are encouraged to relax and let their hair down.", + "length": 125 + }, + { + "text": "It's also a day when Sweden's sporting achievements are celebrated and this year their Winter Olympic athletes were honoured.", + "length": 125 + }, + { + "text": "The rain poured at Victoria Day celebrations on the Crown Princess's 37th birthday at Borgholm Stadium on Oland's island in Sweden .", + "length": 132 + }, + { + "text": "Having fun: While her mother met the public, Princess Estelle's father, Prince Daniel, kept her entertained with a bottle of bubbles .", + "length": 134 + }, + { + "text": "Here they come: The little princess held on to her mother and grandmother's hands as they walked out to the courtyard to greet the public .", + "length": 139 + }, + { + "text": "The Swedish royal family didn't let a little rain dampen their parade as they gathered to celebrate Crown Princess Victoria's 37th birthday.", + "length": 140 + }, + { + "text": "This means Crown Princess Victoria will one day inherit the throne and, in turn, her first-born daughter will continue the line of succession.", + "length": 142 + }, + { + "text": "The family - and members of the public who had come to wish Victoria a happy birthday - wore their ponchos as they took in a show at the stadium.", + "length": 145 + }, + { + "text": "The cute princess stole the limelight away from her mother today as the royal family gathered to celebrate Crown Princess Victoria's 37th birthday.", + "length": 147 + }, + { + "text": "The heavens may have opened but the royal family smiled as they enjoyed a musical show at their summer residence Sollidens Palace, on the island of Oland, Sweden.", + "length": 162 + }, + { + "text": "Holding on tightly to her mother and grandmother's hands, she then delighted the gathered crowd when she gave them a beaming smile and clapped her hands together in glee.", + "length": 170 + }, + { + "text": "(L-R) Princess Madeleine of Sweden, Prince Daniel, Crown Princess Victoria of Sweden, King Carl Gustav XVI and Queen Silvia, wear their ponchos as the heavens open on Victoria Day .", + "length": 181 + }, + { + "text": "The show must go on: The royal family didn't let the rain ruin their fun as they celebrated Victoria Day celebrations on the Crown Princess's 37th birthday at Borgholm Stadium on Oland's island in Sweden .", + "length": 205 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.38651201128959656 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:19.803197586Z", + "first_section_created": "2025-12-23T09:36:19.803596602Z", + "last_section_published": "2025-12-23T09:36:19.803831311Z", + "all_results_received": "2025-12-23T09:36:19.865285185Z", + "output_generated": "2025-12-23T09:36:19.865459492Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:19.803596602Z", + "publish_time": "2025-12-23T09:36:19.803831311Z", + "first_worker_start": "2025-12-23T09:36:19.80428753Z", + "last_worker_end": "2025-12-23T09:36:19.864348Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:19.804234827Z", + "start_time": "2025-12-23T09:36:19.80430003Z", + "end_time": "2025-12-23T09:36:19.804391834Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:19.804475Z", + "start_time": "2025-12-23T09:36:19.804614Z", + "end_time": "2025-12-23T09:36:19.864348Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:19.804313231Z", + "start_time": "2025-12-23T09:36:19.804387934Z", + "end_time": "2025-12-23T09:36:19.804499738Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:19.804228527Z", + "start_time": "2025-12-23T09:36:19.80428753Z", + "end_time": "2025-12-23T09:36:19.804323931Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4487, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/00743e4563e3e77f18910a7a284b4689c1666576.json b/data/output/00743e4563e3e77f18910a7a284b4689c1666576.json new file mode 100644 index 0000000..9af95b9 --- /dev/null +++ b/data/output/00743e4563e3e77f18910a7a284b4689c1666576.json @@ -0,0 +1,318 @@ +{ + "file_name": "00743e4563e3e77f18910a7a284b4689c1666576.txt", + "total_words": 701, + "top_n_words": [ + { + "word": "the", + "count": 41 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "a", + "count": 18 + }, + { + "word": "jordan", + "count": 17 + }, + { + "word": "his", + "count": 15 + }, + { + "word": "of", + "count": 14 + }, + { + "word": "home", + "count": 13 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "with", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "com reported.", + "length": 13 + }, + { + "text": "The NBA star paid $4.", + "length": 21 + }, + { + "text": "business insider reported.", + "length": 26 + }, + { + "text": "He also has a vacation home in Utah.", + "length": 36 + }, + { + "text": "8 million for the land, and another $7.", + "length": 39 + }, + { + "text": "6 million for construction of the home.", + "length": 39 + }, + { + "text": "making him the first ex-player to own a team.", + "length": 45 + }, + { + "text": "Mr Jordan remarried model Yvette Prieto last April.", + "length": 51 + }, + { + "text": "But now Mr Jordan is hedging his bets on an auction.", + "length": 52 + }, + { + "text": "The Jordans divorced in late 2006, after 17 years of marriage.", + "length": 62 + }, + { + "text": "The closest sale in the area was a $16million Lake Bluff home in 2007.", + "length": 70 + }, + { + "text": "The professional-caliber court also has its own entrance and parking, Forbes.", + "length": 77 + }, + { + "text": "There's also a three-bedroom guesthouse, pool area and outdoor tennis court .", + "length": 77 + }, + { + "text": "The secluded property boasts nine bedrooms, 19 bathrooms and five fireplaces.", + "length": 77 + }, + { + "text": "There is no minimum reserve, but bidders must make submit a deposit of $250,000.", + "length": 80 + }, + { + "text": "It has a sound system set up to provide perfect acoustics within the court space.", + "length": 81 + }, + { + "text": "Jordan now splits his time between his homes in North Carolina and Jupiter, Florida.", + "length": 84 + }, + { + "text": "Moving on: Jordan remarried this past April to model Yvette Prieto (pictured above).", + "length": 84 + }, + { + "text": "Living large: The enormous home boasts nine bedrooms, nineteen bathrooms and five fireplaces.", + "length": 93 + }, + { + "text": "The mansion has 11 bedrooms, a guard-house, a media center, and of course, a basketball court.", + "length": 94 + }, + { + "text": "Jordan originally listed the 56,000-square-foot monolith on the shores of Lake Michigan in February 2012.", + "length": 105 + }, + { + "text": "He's pictured on the right with daughter Jasmine and sons Jeffrey Michael and Marcus when they were younger .", + "length": 109 + }, + { + "text": "It took two years to build and was designed 'to Jordan’s exacting specifications,' according to the listing.", + "length": 110 + }, + { + "text": "Six time NBA champion Jordan became part-owner of the Charlotte Bobcats in 2006, and then majority owner in 2010.", + "length": 113 + }, + { + "text": "He now spends most of his time between his homes in North Carolina and Florida - as well as a vacation home in Utah .", + "length": 117 + }, + { + "text": "There's also a three-bedroom guesthouse, pool area, outdoor tennis court and three climate-controlled multi-car garages.", + "length": 120 + }, + { + "text": "In 2010 Jordan bought a mansion in 'The Bears Club', an exclusive Florida golfing community developed by golfer Jack Nicklaus.", + "length": 126 + }, + { + "text": "The privately-gated residence, where Jordan lived with his former-wife, Juanita, and their three children, is to be sold furnished.", + "length": 131 + }, + { + "text": "Win Jordan's home by Thanksgiving: In order to participate in the auction on November 22, bidders need only submit a deposit of $250,000 .", + "length": 138 + }, + { + "text": "Downsizing: Jordan said he was auctioning off the house because his three kids are grown and he doesn't need a big house in Chicago anymore.", + "length": 140 + }, + { + "text": "Jordan put his home of 20 years on the market for $29million originally, which ended up by the most expensive listing price in the area ever.", + "length": 141 + }, + { + "text": "The private gate is boldly embellished with a huge number 23, the jersey number Jordan famously wore during his initial tenure with the Chicago Bulls.", + "length": 150 + }, + { + "text": "Grand entrance: The gate leading up to the private home is emblazoned with the number 23, his jersey number when he played on the Chicago Bulls team .", + "length": 150 + }, + { + "text": "He told the Wall Street Journal in an email that he's selling the property because his three kids are grown now and he doesn't need a big house in Chicago.", + "length": 155 + }, + { + "text": "Failing to find any serious buyers for his decked-out Highland Park, Illinois mansion - basketball legend Michael Jordan is taking his home to the auction block.", + "length": 161 + }, + { + "text": "The home will be auctioned on November 22 by New York-based Concierge Auctions in association with listing agent Katherine Chez Malkin of Baird \u0026 Warner realtors.", + "length": 162 + }, + { + "text": "Their home: The Highland Park mansion was Jordan's home for 20 years and where he raised his three children with wife Juanita before the couple divorced in 2006 after 17 years of marriage .", + "length": 189 + }, + { + "text": "Clearing house: After failing to sell his Highland Park, Illinois mansion with the original asking price of $29million, basketball legend Michael Jordan will auction it off on November 22 .", + "length": 189 + }, + { + "text": "Unsurprisingly, the stellar feature is an indoor basketball complex featuring a full-size regulation court with specially cushioned hardwood flooring and competition-quality high intensity lighting.", + "length": 198 + }, + { + "text": "Practice space: Of course the main attraction of the home is Jordan's indoor basketball complex with a full-size regulation court constructed with specially-cushioned hardwood flooring and competition lighting .", + "length": 211 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4801637828350067 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:20.304911984Z", + "first_section_created": "2025-12-23T09:36:20.305248797Z", + "last_section_published": "2025-12-23T09:36:20.305516808Z", + "all_results_received": "2025-12-23T09:36:20.36667987Z", + "output_generated": "2025-12-23T09:36:20.366850977Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:20.305248797Z", + "publish_time": "2025-12-23T09:36:20.305516808Z", + "first_worker_start": "2025-12-23T09:36:20.305963826Z", + "last_worker_end": "2025-12-23T09:36:20.365755Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:20.305921024Z", + "start_time": "2025-12-23T09:36:20.306001528Z", + "end_time": "2025-12-23T09:36:20.306081731Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:20.306264Z", + "start_time": "2025-12-23T09:36:20.3064Z", + "end_time": "2025-12-23T09:36:20.365755Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:20.305904424Z", + "start_time": "2025-12-23T09:36:20.305963826Z", + "end_time": "2025-12-23T09:36:20.30605823Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:20.305952126Z", + "start_time": "2025-12-23T09:36:20.306030229Z", + "end_time": "2025-12-23T09:36:20.30607223Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4152, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/007461fca041c3a5389c7768d93194f25376f1dd.json b/data/output/007461fca041c3a5389c7768d93194f25376f1dd.json new file mode 100644 index 0000000..666afb9 --- /dev/null +++ b/data/output/007461fca041c3a5389c7768d93194f25376f1dd.json @@ -0,0 +1,242 @@ +{ + "file_name": "007461fca041c3a5389c7768d93194f25376f1dd.txt", + "total_words": 500, + "top_n_words": [ + { + "word": "and", + "count": 25 + }, + { + "word": "the", + "count": 20 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "is", + "count": 11 + }, + { + "word": "paper", + "count": 11 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "as", + "count": 10 + }, + { + "word": "dung", + "count": 9 + }, + { + "word": "elephant", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "This is then mixed with recycled paper.", + "length": 41 + }, + { + "text": "Animal dung is boiled for up to six hours to soften it and remove bacteria.", + "length": 76 + }, + { + "text": "One company is taking it even further by utilising a variety of animals' excrement.", + "length": 83 + }, + { + "text": "Stationery companies are using a quirky new ingredient in a bid to help save the rainforest.", + "length": 92 + }, + { + "text": "This is then mixed with other non-wood fibres such as pineapple husks, hay and banana tree trunks.", + "length": 99 + }, + { + "text": "Rather than chop down trees to make paper, some brands are using elephant poo as a material instead .", + "length": 101 + }, + { + "text": "After collection, the dung is boiled for up to six hours, which softens the faeces and removes bacteria.", + "length": 104 + }, + { + "text": "These animals have highly fibrous diets of bamboo, sugar cane, grasses, banana trees, leaves, twigs and fruits.", + "length": 111 + }, + { + "text": "The elephant dung is collected and vigorously boiled with margosa leaves, which have natural disinfectant properties.", + "length": 117 + }, + { + "text": "Due to their specific digestive systems, they can't fully break down these fibres, making the dung perfect for paper making.", + "length": 124 + }, + { + "text": "This is mixed with non-wood fibres, such as pineapple husks and banana tree trunks and can be dyed to create greeting cards and faux flowers .", + "length": 143 + }, + { + "text": "The coarseness and shade of the paper is entirely dependent on the elephants’ diet, making each sheet as unique as the elephant that produced it.", + "length": 147 + }, + { + "text": "PooPooPaper uses dung from a host of different fibre-eating herbivorous animals such as elephants, cows, horses, moose, pandas and donkeys in northern Thailand.", + "length": 160 + }, + { + "text": "As gifts and souvenirs, products include journals, notepads, albums, frames, bags, stationery, greeting cards, calendars, diaries, key chains, magnets and jewellery.", + "length": 165 + }, + { + "text": "The final product can be coloured using non-toxic food colouring and laid out to dry in the sun before being crafted into a variety of different consumer and industrial products.", + "length": 179 + }, + { + "text": "Rather than chop down trees to make paper, some brands like The Eden Project, which makes notebooks at the Maximus Elephant Conservation Trust in Sri Lanka, are using animal poo instead.", + "length": 186 + }, + { + "text": "PooPooPaper says its products, such as boxes and keyrings, are fun, eco-friendly, inspirational and a creative and novel alternative to wood-pulp based paper products and don't harm any forests .", + "length": 195 + }, + { + "text": "The company says its products are fun, eco-friendly, inspirational and a creative and novel alternative to wood-pulp based paper products, which rely on the continual cutting of trees and harvesting of forests.", + "length": 210 + }, + { + "text": "The process of making elephant dung paper is a little different to crafting regular handmade paper as the elephant's digestive system breaks down vegetable fibres so the dung is full of ready-to-use paper pulp.", + "length": 210 + }, + { + "text": "Animals like donkeys, elephants and cows have highly fibrous diets of bamboo, sugar cane, grasses, banana trees, leaves, twigs and fruits so their dung is an ideal paper material, say the makers of this quirky stationery .", + "length": 222 + }, + { + "text": "Strange process: PooPooPaper, left, uses dung from a host of different fibre-eating herbivorous animals such as elephants, cows, horses, moose, pandas and donkeys in northern Thailand, while The Eden Project collects elephant poo and vigorously boils it with naturally disinfecting margosa leaves .", + "length": 299 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4521607458591461 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:20.80633827Z", + "first_section_created": "2025-12-23T09:36:20.806726686Z", + "last_section_published": "2025-12-23T09:36:20.806863991Z", + "all_results_received": "2025-12-23T09:36:20.872580437Z", + "output_generated": "2025-12-23T09:36:20.872697442Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:20.806726686Z", + "publish_time": "2025-12-23T09:36:20.806863991Z", + "first_worker_start": "2025-12-23T09:36:20.807364011Z", + "last_worker_end": "2025-12-23T09:36:20.871457Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:20.807351811Z", + "start_time": "2025-12-23T09:36:20.807417714Z", + "end_time": "2025-12-23T09:36:20.807478316Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:20.807658Z", + "start_time": "2025-12-23T09:36:20.807804Z", + "end_time": "2025-12-23T09:36:20.871457Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:20.807379212Z", + "start_time": "2025-12-23T09:36:20.807445915Z", + "end_time": "2025-12-23T09:36:20.807531818Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:20.807309409Z", + "start_time": "2025-12-23T09:36:20.807364011Z", + "end_time": "2025-12-23T09:36:20.807389312Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3083, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/00746c8a53b92428bd3f948a92b206e5a07d7fbb.json b/data/output/00746c8a53b92428bd3f948a92b206e5a07d7fbb.json new file mode 100644 index 0000000..490e0bf --- /dev/null +++ b/data/output/00746c8a53b92428bd3f948a92b206e5a07d7fbb.json @@ -0,0 +1,234 @@ +{ + "file_name": "00746c8a53b92428bd3f948a92b206e5a07d7fbb.txt", + "total_words": 436, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "of", + "count": 19 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "exercise", + "count": 8 + }, + { + "word": "daily", + "count": 5 + }, + { + "word": "even", + "count": 5 + }, + { + "word": "on", + "count": 5 + }, + { + "word": "study", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "All of the participants were told to overeat.", + "length": 45 + }, + { + "text": "matched the energy surplus between groups, so the exercising group .", + "length": 68 + }, + { + "text": "consumed even more and were still better off at the end of the week.", + "length": 68 + }, + { + "text": "Research has shown a short period of indulgence can cause long-term effects on the body.", + "length": 88 + }, + { + "text": "Exercise has positive effects even when we are actively storing energy and gaining weight.", + "length": 90 + }, + { + "text": "Feast away: Scientists have found overeating will not affect blood sugar levels in people who exercise .", + "length": 104 + }, + { + "text": "Dr Dylan Thompson, senior author of the paper, published in the Journal of Physiology today, said: 'We .", + "length": 104 + }, + { + "text": "A short daily workout now could combat over-eating and days of lazing about at Christmas, scientists claim.", + "length": 107 + }, + { + "text": "Those who did not exercise increased their calorie intake by 50 per cent and the others consumed 75 per cent more.", + "length": 114 + }, + { + "text": "' Solution: Study found we can indulge in peace if we do a short daily bout of exercise in the run up to Christmas .", + "length": 116 + }, + { + "text": "'However, even though energy was still being stored, regular exercise prevented many of the long-term negative changes from taking place.", + "length": 137 + }, + { + "text": "Experts at the University of Bath found exercising during a time of excess can stabilise blood sugar levels and prevent damage to metabolism.", + "length": 141 + }, + { + "text": "In a group of 26 healthy young men, half of them only exercised daily on a treadmill for 45 minutes, while the other half remained entirely inactive.", + "length": 149 + }, + { + "text": "Dr James Betts, one of the researchers who worked on the study, added: 'This new research shows that the picture is more sophisticated than \"energy\" alone.", + "length": 155 + }, + { + "text": "Metabolism also slowed in the men that did not exercise, the study showed, as the genes in their fat cells were not adequately activated to process the sizable meals.", + "length": 166 + }, + { + "text": "But after one week, the non-exercising group's blood sugar levels fell into an unhealthy decline, while those doing a daily workout showed no side effects from the extra food.", + "length": 175 + }, + { + "text": "But a new study released today revealed this can be countered by 45 minutes of exercise a day in the run up to the festive season - even if the amount of calories consumed is significantly more than those burned off.", + "length": 216 + }, + { + "text": "' Dr Jean-Philippe Walhin, also from the university, added: 'Short-term overfeeding and reduced physical activity had a dramatic impact on the overall metabolic health of the participants and on various key genes within fat tissue.", + "length": 231 + }, + { + "text": "'If you are facing a period of over-consumption and inactivity this Christmas, then our study shows that a daily bout of exercise will prevent many of the negative changes in the way in which your body handles sugar, even if you do still gain weight.", + "length": 250 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5268265604972839 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:21.307511146Z", + "first_section_created": "2025-12-23T09:36:21.307868761Z", + "last_section_published": "2025-12-23T09:36:21.308061469Z", + "all_results_received": "2025-12-23T09:36:21.373326096Z", + "output_generated": "2025-12-23T09:36:21.373484302Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:21.307868761Z", + "publish_time": "2025-12-23T09:36:21.308061469Z", + "first_worker_start": "2025-12-23T09:36:21.30859579Z", + "last_worker_end": "2025-12-23T09:36:21.371774Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:21.308562089Z", + "start_time": "2025-12-23T09:36:21.308626591Z", + "end_time": "2025-12-23T09:36:21.308680693Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:21.30881Z", + "start_time": "2025-12-23T09:36:21.30895Z", + "end_time": "2025-12-23T09:36:21.371774Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:21.308547188Z", + "start_time": "2025-12-23T09:36:21.308616191Z", + "end_time": "2025-12-23T09:36:21.308685094Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:21.308518587Z", + "start_time": "2025-12-23T09:36:21.30859579Z", + "end_time": "2025-12-23T09:36:21.308624291Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2541, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/00746e95fef40eda4e216ca1daf5c867d700f881.json b/data/output/00746e95fef40eda4e216ca1daf5c867d700f881.json new file mode 100644 index 0000000..d62be6c --- /dev/null +++ b/data/output/00746e95fef40eda4e216ca1daf5c867d700f881.json @@ -0,0 +1,322 @@ +{ + "file_name": "00746e95fef40eda4e216ca1daf5c867d700f881.txt", + "total_words": 813, + "top_n_words": [ + { + "word": "the", + "count": 60 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "a", + "count": 24 + }, + { + "word": "and", + "count": 22 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "of", + "count": 18 + }, + { + "word": "hiv", + "count": 14 + }, + { + "word": "virus", + "count": 14 + }, + { + "word": "by", + "count": 12 + }, + { + "word": "infected", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": ".", + "length": 1 + }, + { + "text": ".", + "length": 1 + }, + { + "text": "somewhere closer to home.", + "length": 25 + }, + { + "text": "' Scroll down for video .", + "length": 25 + }, + { + "text": "A conceptual view of HIV in the bloodstream.", + "length": 44 + }, + { + "text": "'It reached drug addicts through shared needles.", + "length": 48 + }, + { + "text": "Since 1981, about 78 million people have been infected by HIV .", + "length": 63 + }, + { + "text": "Thirty-nine million people have died, according to UN estimates.", + "length": 64 + }, + { + "text": "' Since 1981, about 78 million people have been infected by HIV.", + "length": 64 + }, + { + "text": "Quammen wrote: 'It reached hemophiliacs through the blood supply.", + "length": 65 + }, + { + "text": "If Quammen is right, there is no way Dugas could have brought HIV to the US.", + "length": 77 + }, + { + "text": "1910s-20s: The hunter spreads the virus by having sex with at least one other person.", + "length": 85 + }, + { + "text": "He called Dugas 'Patient Zero' and hypothesized he infected at least 40 people with HIV.", + "length": 88 + }, + { + "text": "David Quammen traced the history of AIDS by examining genetic samples from humans and chimps .", + "length": 94 + }, + { + "text": "1908: A hunter is infected by an HiV-like virus after he kills and butchers a chimp in Cameroon.", + "length": 96 + }, + { + "text": "The account contradicts the theory put forth in Randy Shilts' history of AIDS, And the Band Played On.", + "length": 102 + }, + { + "text": "Antiretroviral drugs, invented in the mid-1990s, can treat infection but cannot cure it or prevent it.", + "length": 102 + }, + { + "text": "' French-speaking Haitians who were working in the Congo went home after Belgium gave up the colony in 1950.", + "length": 108 + }, + { + "text": "It gradually begins working its way down the Sangha River before reaching Kinshasa, the capital of the Congo.", + "length": 110 + }, + { + "text": "In his book, Quammen (left) theorizes that a chimp infected a hunter with a virus similar to HIV in Cameroon .", + "length": 111 + }, + { + "text": "'As evidence now shows, HIV had already arrived in North America when Gaëtan ­Dugas was a virginal adolescent.", + "length": 112 + }, + { + "text": "His led him to believe the birthplace of the epidemic was the southeastern edge of Cameroon sometime around 1908.", + "length": 113 + }, + { + "text": "Quammen added: 'Someone brought back to Haiti, along with Congolese memories, a dose of HIV-1, Group M, Subtype B.", + "length": 114 + }, + { + "text": "The virus destroys immune cells and leaves the body exposed to tuberculosis, pneumonia and other opportunistic diseases.", + "length": 120 + }, + { + "text": "After reaching the Congo, the virus spread even faster via the continued re-use of  hypodermic syringes at health clinics.", + "length": 123 + }, + { + "text": "It reached gay men… by sexual transmission, possibly from an initial contact between two males, an American and a Haitian.", + "length": 124 + }, + { + "text": "In his book, Shilts theorizes the virus could have been brought to the US by a gay Air Canada steward named Gaëtan Dugas.", + "length": 124 + }, + { + "text": "Quammen wrote: 'Dugas himself was infected by some other human, presumably during a sexual encounter — and not in Africa .", + "length": 124 + }, + { + "text": "1969: Shared needles at Haitian clinics also spread the virus and it is brought to the US 'in 1969, plus or minus about three years'.", + "length": 133 + }, + { + "text": "1920s-50s: Health campaigns to treat tropical diseases re-use needles multiple times and the virus is spread unknowingly via syringes.", + "length": 134 + }, + { + "text": "Treatment for a common ailment in the area at the time required 36 injections over three years and most needles were used over and over.", + "length": 137 + }, + { + "text": "1960: After Belgium gives up the Congo as a colony, French-speaking Haitians head home and bring 'a dose of HIV-1, Group M, Subtype B' with them.", + "length": 145 + }, + { + "text": "1980: A UCLA Medical Center professor notices a number of gay men suffering from pneumonia because of weakened immune systems and HIV is discovered thereafter.", + "length": 159 + }, + { + "text": "For the book, which is subtitled 'How AIDS emerged from an African forest', Quammen traced the history of AIDS by examining genetic samples from humans and chimps.", + "length": 163 + }, + { + "text": "Quammen theorizes that a hunter in the rainforest was infected with a immunodeficiency virus similar to HIV by a chimp he killed and butchered, the New York Post reported.", + "length": 173 + }, + { + "text": "A new book claims the AIDS epidemic began in a rainforest in southeastern Cameroon in 1908 and not more than 70 years later when the virus started to be recognized in the early 1980s.", + "length": 184 + }, + { + "text": "' The re-use of needles allowed the virus to spread in Haiti and an infected person or infected container of blood plasma likely brought AIDS to America 'in 1969, plus or minus about three years'.", + "length": 196 + }, + { + "text": "The hunter likely infected at least one other person through sex and the virus continued to make its way down the Sangha River in that matter until it reached the city of Leopoldville (now Kinshasa) in the Congo.", + "length": 213 + }, + { + "text": "Quammen wrote: 'Once the reusable needles and syringes had put the virus into enough people — say, several hundred — it wouldn't come to a dead end, it wouldn't burn out, and sexual transmission could do the rest.", + "length": 217 + }, + { + "text": "The number of people newly infected with HIV over the last year was lower than the number of HIV-positive people getting access to the medicines they need to keep AIDS at bay, according to a report by the ONE campaign.", + "length": 219 + }, + { + "text": "In his book (right), Shilts theorizes the virus could have been brought to the US by a gay Air Canada steward named Gaëtan Dugas (left) The book claims AIDS originated in Cameroon around 1908 and spread down the Sangha River before reaching Kinshasa, the capital of the Congo, sometime in the 1910s or 1920s .", + "length": 311 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.5249229669570923 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:21.808830028Z", + "first_section_created": "2025-12-23T09:36:21.810584599Z", + "last_section_published": "2025-12-23T09:36:21.810808808Z", + "all_results_received": "2025-12-23T09:36:21.874539174Z", + "output_generated": "2025-12-23T09:36:21.874720581Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:21.810584599Z", + "publish_time": "2025-12-23T09:36:21.810808808Z", + "first_worker_start": "2025-12-23T09:36:21.811314728Z", + "last_worker_end": "2025-12-23T09:36:21.873575Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:21.811261426Z", + "start_time": "2025-12-23T09:36:21.811314728Z", + "end_time": "2025-12-23T09:36:21.811401932Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:21.811459Z", + "start_time": "2025-12-23T09:36:21.811599Z", + "end_time": "2025-12-23T09:36:21.873575Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:21.811273527Z", + "start_time": "2025-12-23T09:36:21.811329229Z", + "end_time": "2025-12-23T09:36:21.811448934Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:21.811334629Z", + "start_time": "2025-12-23T09:36:21.811444334Z", + "end_time": "2025-12-23T09:36:21.811482735Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4747, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/00746ee0bd7b3645786ec2f3b60f6e41247d46f4.json b/data/output/00746ee0bd7b3645786ec2f3b60f6e41247d46f4.json new file mode 100644 index 0000000..af8c0d6 --- /dev/null +++ b/data/output/00746ee0bd7b3645786ec2f3b60f6e41247d46f4.json @@ -0,0 +1,278 @@ +{ + "file_name": "00746ee0bd7b3645786ec2f3b60f6e41247d46f4.txt", + "total_words": 529, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "in", + "count": 31 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "at", + "count": 11 + }, + { + "word": "will", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "trip", + "count": 9 + }, + { + "word": "couple", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "uk.", + "length": 3 + }, + { + "text": "co.", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "com and holidaysplease.", + "length": 23 + }, + { + "text": "VeryFirstTo is known for its quirky holidays.", + "length": 45 + }, + { + "text": "Unassuming: The entrance to Sushi Yoshitake in Tokyo .", + "length": 54 + }, + { + "text": "Fine dining: At Restaurant Gordon Ramsay in Chelsea, London .", + "length": 61 + }, + { + "text": "Luxury: The French Laundry restaurant in Napa Valley, California .", + "length": 66 + }, + { + "text": "Holidaysplease say it is the most lavish trip it has ever organised.", + "length": 68 + }, + { + "text": "Closer to home: The Fat Duck, in Bray, Berkshire, has made the list .", + "length": 69 + }, + { + "text": "Contemporary: The dining at room at Michel Bras in Laguiole, France .", + "length": 69 + }, + { + "text": "The adventure takes six months and involves dinner at 109 restaurants.", + "length": 70 + }, + { + "text": "Highlight: Sushi Yoshitake in Tokyo is one of the main attractions on the trip .", + "length": 80 + }, + { + "text": "Divine: A typical dish at Per Se in New York, one of the greatest restaurants in the world .", + "length": 92 + }, + { + "text": "A donation of £1,500 will be made to charity The Prince’s Trust once the trip is purchased.", + "length": 94 + }, + { + "text": "Encompassing 12 countries, the trip has been organised by pioneering travel firms veryfirstto.", + "length": 94 + }, + { + "text": "World class: Chefs Heston Blumenthal and Masahiro Yoshitake are behind some of the restaurants on the trip .", + "length": 108 + }, + { + "text": "The belt-busting adventure, which takes six months, will be enjoyed by one couple only - costing a cool £182,000.", + "length": 114 + }, + { + "text": "It is a food lover's dream holiday - the chance to visit every single Michelin three-star restaurant around the globe.", + "length": 118 + }, + { + "text": "The couple will enjoy their gourmet adventure in luxurious style, travelling business class and staying at top hotels.", + "length": 118 + }, + { + "text": "La Pergola in Rome also features in the luxury trip, which will also include business class travel and five-star hotels .", + "length": 121 + }, + { + "text": "From Heston Blumenthal's Fat Duck in Berkshire to Lung King Heen in Hong Kong, the mouth-watering trip takes in a total of 109 of the world's finest eateries.", + "length": 158 + }, + { + "text": "Wallet-busting: The gastronomic adventure will cost £182,000 and takes in 12 countries, including America, where the lucky couple will visit Per Se in New York .", + "length": 162 + }, + { + "text": "Lavish: In a gastronomic trip of a lifetime, one lucky couple will get to visit all 109 Michelin three-star restaurants around the globe, including Meurice in Paris, pictured .", + "length": 176 + }, + { + "text": "Visiting approximately one restaurant every other day, the couple will have time to savour their spectacular culinary experiences while taking in the local sites of each destination.", + "length": 182 + }, + { + "text": "It recently launched a trip dogs, which got to stay at the Paw Seasons hotel, while it also created a vacation encompassing every World Heritage site over 2 years at a cost of £960,000 per couple.", + "length": 197 + }, + { + "text": "Other restaurants on the list include Sushi Yoshitake in Tokyo, Per Se in New York and Meurice in Paris, which have all been awarded the coveted three stars for their supreme fine dining and ambience.", + "length": 200 + }, + { + "text": "The couple will get to enjoy dishes such as Salmon Poached in a Liquorice Gel at The Fat Duck in Bray, Berkshire and Arzak’s pineapple bubbles in the mountainous Basque country, as well as the delicacies of legendary chef Alain Ducasse in Monaco.", + "length": 248 + }, + { + "text": "Not only will the lucky couple get to eat at the finest restaurants in the world, they will also travel business class and stay at the globe's finest hotels, including Trump International in New York, Conrad in Tokyo, Hotel De Paris in Monte Carlo and Claridges in London.", + "length": 272 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.43510740995407104 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:22.310507991Z", + "first_section_created": "2025-12-23T09:36:22.310833405Z", + "last_section_published": "2025-12-23T09:36:22.311032413Z", + "all_results_received": "2025-12-23T09:36:22.378721238Z", + "output_generated": "2025-12-23T09:36:22.378879244Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:22.310833405Z", + "publish_time": "2025-12-23T09:36:22.311032413Z", + "first_worker_start": "2025-12-23T09:36:22.311485631Z", + "last_worker_end": "2025-12-23T09:36:22.377809Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:22.311434529Z", + "start_time": "2025-12-23T09:36:22.311504532Z", + "end_time": "2025-12-23T09:36:22.311564734Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:22.311694Z", + "start_time": "2025-12-23T09:36:22.31184Z", + "end_time": "2025-12-23T09:36:22.377809Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:22.31146203Z", + "start_time": "2025-12-23T09:36:22.311534833Z", + "end_time": "2025-12-23T09:36:22.311609636Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:22.311417228Z", + "start_time": "2025-12-23T09:36:22.311485631Z", + "end_time": "2025-12-23T09:36:22.311516532Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3116, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/0074ce973a86e2c417990950a9c9d24b4699f858.json b/data/output/0074ce973a86e2c417990950a9c9d24b4699f858.json new file mode 100644 index 0000000..ef1c94f --- /dev/null +++ b/data/output/0074ce973a86e2c417990950a9c9d24b4699f858.json @@ -0,0 +1,258 @@ +{ + "file_name": "0074ce973a86e2c417990950a9c9d24b4699f858.txt", + "total_words": 628, + "top_n_words": [ + { + "word": "the", + "count": 37 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "have", + "count": 10 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "has", + "count": 9 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "at", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "7m.", + "length": 3 + }, + { + "text": "Who are football's top January transfer targets?", + "length": 48 + }, + { + "text": "7 million bid for Manchester City's Argentine striker.", + "length": 54 + }, + { + "text": "He told the official Chelsea website: \"Chelsea are a massive club.", + "length": 66 + }, + { + "text": "I have had several opportunities to leave but I have always stayed.", + "length": 67 + }, + { + "text": "\"Competition will be tight for him but we brought him in to become better as a team.", + "length": 84 + }, + { + "text": "They look to win trophies season in season out and it is a big opportunity for me to be a part of that.", + "length": 103 + }, + { + "text": "Samba issued a statement saying: \"In my five years at Blackburn I have always given 100% in every game I have played.", + "length": 117 + }, + { + "text": "\" In other transfer news, Barcelona have announced that French defender Eric Abidal has signed a new deal with the club.", + "length": 120 + }, + { + "text": "Abidal has made 177 appearances in four years with Barca, winning the Champions League twice and the Spanish La Liga three times.", + "length": 129 + }, + { + "text": "\"I have decided now is the right time for me to pursue a new challenge and I have asked the club to respect my decision and allow me to leave.", + "length": 142 + }, + { + "text": "Meanwhile, the Carlos Tevez transfer saga has taken a new twist after Inter Milan president Massimo Moratti confirmed the Italian club had made a $31.", + "length": 150 + }, + { + "text": "\" Cahill joined Bolton from Aston Villa in January 2008 and developed into one of England's top defenders during his four years at the Reebok Stadium.", + "length": 150 + }, + { + "text": "(CNN) -- Chelsea have completed the signing of England international defender Gary Cahill from Premier League rivals Bolton Wanderers for a fee of $10.", + "length": 151 + }, + { + "text": "However, he had already indicated he was not prepared to sign a new contract, meaning Wanderers were forced to sell him or lose him for free at the end of the season.", + "length": 166 + }, + { + "text": "Speaking to reporters after Inter's 1-0 victory over city rivals AC Milan, Moratti said: \"Our offer is 25 million euros -- now it depends on them whether they accept it or not.", + "length": 176 + }, + { + "text": "The 32-year-old, who had been linked with a move away from the Nou Camp, is now contracted to the European champions until June 2013, with an option to extend the deal until 2015.", + "length": 179 + }, + { + "text": "The 26-year-old finalized his protracted move on Monday after agreeing personal terms and passing a medical, making it the biggest English transfer so far during the January window.", + "length": 181 + }, + { + "text": "The Congolese international has already been the subject of a rejected bid from QPR, while title-chasing Tottenham and French big spenders PSG have also been linked with the player.", + "length": 181 + }, + { + "text": "Elsewhere in the Premier League, Blackburn central defender Chris Samba has handed in a written transfer request, despite Rovers manager Steve Kean saying the player was not for sale.", + "length": 183 + }, + { + "text": "Cahill has signed a five-and-a-half year contract with the London club, despite doubts beginning to surface about the deal due to the length of time negotiations over his financial terms took.", + "length": 192 + }, + { + "text": "Speaking on Saturday about Cahill's impending arrival, Chelsea manager Andre Villas Boas told reporters: \"He has good technical abilities which is important in the way we want to play and to implement our philosophy.", + "length": 216 + }, + { + "text": "\"Opportunities like this, you just can't turn down,\" added Cahill, who has won seven England caps and will be battling with Brazilian David Luiz to partner England captain John Terry at the heart of the Chelsea defense.", + "length": 219 + }, + { + "text": "\" Inter have emerged as favorites to sign Tevez, after Milan pulled out of the race last week when their plan to sell Alexandre Pato to Paris St Germain fell through following the Brazilian's decision to stay at the San Siro.", + "length": 225 + }, + { + "text": "Ironically big-spending PSG now seem the only realistic challengers for Tevez, who has fallen out of favor at City after refusing to come off the substitutes' bench during the 3-1 Champions League defeat at Bayern Munich earlier in the season.", + "length": 243 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.44317519664764404 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:22.811786972Z", + "first_section_created": "2025-12-23T09:36:22.812154687Z", + "last_section_published": "2025-12-23T09:36:22.812348395Z", + "all_results_received": "2025-12-23T09:36:22.8721028Z", + "output_generated": "2025-12-23T09:36:22.872271107Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:22.812154687Z", + "publish_time": "2025-12-23T09:36:22.812348395Z", + "first_worker_start": "2025-12-23T09:36:22.812915617Z", + "last_worker_end": "2025-12-23T09:36:22.871181Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:22.812928318Z", + "start_time": "2025-12-23T09:36:22.81298252Z", + "end_time": "2025-12-23T09:36:22.813045323Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:22.81303Z", + "start_time": "2025-12-23T09:36:22.813172Z", + "end_time": "2025-12-23T09:36:22.871181Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 58 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:22.812844514Z", + "start_time": "2025-12-23T09:36:22.812929118Z", + "end_time": "2025-12-23T09:36:22.813021522Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:22.812838014Z", + "start_time": "2025-12-23T09:36:22.812915617Z", + "end_time": "2025-12-23T09:36:22.812939018Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 58, + "min_processing_ms": 58, + "max_processing_ms": 58, + "avg_processing_ms": 58, + "median_processing_ms": 58, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3564, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/0074e6c0d6ecfe9155840fe1b9710858f0724b3e.json b/data/output/0074e6c0d6ecfe9155840fe1b9710858f0724b3e.json new file mode 100644 index 0000000..8ff8457 --- /dev/null +++ b/data/output/0074e6c0d6ecfe9155840fe1b9710858f0724b3e.json @@ -0,0 +1,194 @@ +{ + "file_name": "0074e6c0d6ecfe9155840fe1b9710858f0724b3e.txt", + "total_words": 146, + "top_n_words": [ + { + "word": "the", + "count": 10 + }, + { + "word": "bendtner", + "count": 6 + }, + { + "word": "of", + "count": 5 + }, + { + "word": "to", + "count": 5 + }, + { + "word": "a", + "count": 3 + }, + { + "word": "arsenal", + "count": 3 + }, + { + "word": "from", + "count": 3 + }, + { + "word": "has", + "count": 3 + }, + { + "word": "is", + "count": 3 + }, + { + "word": "summer", + "count": 3 + } + ], + "sorted_sentences": [ + { + "text": "Nicklas Bendtner is taking full advantage of his extended summer break.", + "length": 71 + }, + { + "text": "VIDEO Scroll down to watch Bendtner's girlfriend take the mick out of him .", + "length": 75 + }, + { + "text": "Bizarre: Bendtner goes to extreme measures to protect himself from the sun .", + "length": 76 + }, + { + "text": "He has attracted interest from Eintracht Frankfurt but wants to stay in the Premier League.", + "length": 91 + }, + { + "text": "Frustrating: Bendtner's career has stalled somewhat significantly over the last couple of years .", + "length": 97 + }, + { + "text": "Linked with Crystal Palace and Aston Villa this summer, Bendtner scored just twice for Arsenal last season.", + "length": 107 + }, + { + "text": "The former Arsenal man is dating Julie Zangengerg, and we can only assume Julie is the owner of the bra in question.", + "length": 116 + }, + { + "text": "Bendtner, protecting his dignity with a white bra, captioned the photo saying: ‘Remember to protect yourself from the sun’.", + "length": 127 + }, + { + "text": "Having left Arsenal this summer, the Danish striker has posted a photo on Instagram of himself getting, literally, a full-on tan.", + "length": 129 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.725196897983551 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:23.31251603Z", + "first_section_created": "2025-12-23T09:36:23.312859144Z", + "last_section_published": "2025-12-23T09:36:23.313058852Z", + "all_results_received": "2025-12-23T09:36:23.379664434Z", + "output_generated": "2025-12-23T09:36:23.379809539Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 66, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:23.312859144Z", + "publish_time": "2025-12-23T09:36:23.313058852Z", + "first_worker_start": "2025-12-23T09:36:23.313532471Z", + "last_worker_end": "2025-12-23T09:36:23.378526Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:23.313549172Z", + "start_time": "2025-12-23T09:36:23.313603074Z", + "end_time": "2025-12-23T09:36:23.313622175Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:23.313777Z", + "start_time": "2025-12-23T09:36:23.313922Z", + "end_time": "2025-12-23T09:36:23.378526Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:23.31349977Z", + "start_time": "2025-12-23T09:36:23.313553872Z", + "end_time": "2025-12-23T09:36:23.313589573Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:23.313473469Z", + "start_time": "2025-12-23T09:36:23.313532471Z", + "end_time": "2025-12-23T09:36:23.313540872Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 897, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/0075033bf46020cb048da513b1482f84933e80cc.json b/data/output/0075033bf46020cb048da513b1482f84933e80cc.json new file mode 100644 index 0000000..55582b9 --- /dev/null +++ b/data/output/0075033bf46020cb048da513b1482f84933e80cc.json @@ -0,0 +1,318 @@ +{ + "file_name": "0075033bf46020cb048da513b1482f84933e80cc.txt", + "total_words": 702, + "top_n_words": [ + { + "word": "the", + "count": 39 + }, + { + "word": "a", + "count": 32 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "france", + "count": 15 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "she", + "count": 13 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "trevena", + "count": 11 + }, + { + "word": "her", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "She must pay Miss Trevena £100 in compensation.", + "length": 48 + }, + { + "text": "He was given a three-month curfew with £145 in costs.", + "length": 54 + }, + { + "text": "‘Since that day I have felt awful and have not stopped crying.", + "length": 64 + }, + { + "text": "‘The video shows that she was laughing as Leah was protesting.", + "length": 64 + }, + { + "text": "‘I felt my stomach drop and I felt sick,’ she told officers.", + "length": 64 + }, + { + "text": "’ Three days later, France posted the video in full on Facebook.", + "length": 66 + }, + { + "text": "'They don’t think about the consequences of some of their actions.", + "length": 68 + }, + { + "text": "For someone of Miss France’s age it is what they are brought up with.", + "length": 71 + }, + { + "text": "France herself then posted the video in full onto her own Facebook page .", + "length": 73 + }, + { + "text": "’ A specific offence relating to the crime is being discussed by Parliament.", + "length": 78 + }, + { + "text": "Following the case Miss Trevena said: ‘I’m pretty disappointed at the sentence.", + "length": 83 + }, + { + "text": "For this she was given a 7pm to 7am curfew and told to pay a further £210 in costs.", + "length": 84 + }, + { + "text": "When Miss Trevena protested, 18-year-old France promised she would delete the video.", + "length": 84 + }, + { + "text": "’ The incident happened in October after the pair had been out together in Newcastle.", + "length": 87 + }, + { + "text": "Later she sent the video to a friend, Charles Hall, asking him to put a screen shot online.", + "length": 91 + }, + { + "text": "‘Prosecutors are being asked specifically to consider the impact on the victims involved.", + "length": 91 + }, + { + "text": "France, of Macclesfield, Cheshire, admitted an offence under the Malicious Communications Act.", + "length": 94 + }, + { + "text": "Defence lawyer John Gallagher, said: ‘This has all the hallmarks of why I hate social media.", + "length": 94 + }, + { + "text": "But a month later the pair fell out and France posted the 15-second film on her own Facebook page.", + "length": 98 + }, + { + "text": "’ The incident occurred last October after the pair had been out together in Newcastle-upon-Tyne.", + "length": 99 + }, + { + "text": "Hall, of Bollington, Cheshire, had pleaded guilty at an earlier hearing to posting the screen shot.", + "length": 99 + }, + { + "text": "'Leah didn’t give permission to be filmed and asked the defendant to immediately delete the video.", + "length": 100 + }, + { + "text": "The 21-year-old did so on November 14, with the comment: ‘I’ve got the video if anyone else wants it.", + "length": 105 + }, + { + "text": "They met two men and later went back to their digs where France took the sex footage with her mobile phone.", + "length": 107 + }, + { + "text": "She has already posted messages on social media saying she is celebrating because she isn’t going to jail.", + "length": 108 + }, + { + "text": "Danika France filmed Leah Trevena without her consent while she was in bed with a man following a night out.", + "length": 108 + }, + { + "text": "A teenager posted a revenge porn film on Facebook of her best friend having sex, a court was told yesterday.", + "length": 108 + }, + { + "text": "Magistrates in the town yesterday imposed a 12-month community order with an order to wear an electronic tag.", + "length": 109 + }, + { + "text": "Debbie Byrne, prosecuting, said: ‘The defendant opened the bedroom door and filmed her friend in bed with a man.", + "length": 114 + }, + { + "text": "Leah Trevena from Macclesfield, Cheshire, said she was upset at the lenient sentence her former friend has been given .", + "length": 119 + }, + { + "text": "Miss Trevena, a 19-year-old college student, reported her to police, saying she felt ‘hurt, humiliated and suicidal’.", + "length": 121 + }, + { + "text": "Magistrates in Macclesfield heard how France told Miss Trevena (both pictured) to 'jump off a bridge and do everyone a favour .", + "length": 127 + }, + { + "text": "But just one month later the pair fell out and a friend posted a screenshot of the illicit film to Miss Trevena's Facebook page.", + "length": 128 + }, + { + "text": "Magistrates said it was a 'very serious matter' but said France (pictured outside court) pleaded guilty at the first opportunity .", + "length": 130 + }, + { + "text": "France, who is on benefits after losing her job as a carer, also admitted a separate offence of assaulting a girl during a night out.", + "length": 133 + }, + { + "text": "A spokesman from the Crown Prosecution Service said: ‘No one should have to suffer the hurt and humiliation of revenge pornography.", + "length": 133 + }, + { + "text": "Danika France, 18, broke into a bedroom while friend Leah Trevena, 19, was in there with a man following a night out and filmed her .", + "length": 133 + }, + { + "text": "France (right) who is currently on benefits also admitted an unrelated offence of an assault by beating on another girl during a night out in Macclesfield .", + "length": 156 + }, + { + "text": "’ France failed to do so and soon their friendship fell apart to the extent that she told Miss Trevena to ‘jump off a bridge and do everyone a favour’.", + "length": 157 + }, + { + "text": "The court heard Miss Trevena began to distance herself from France over the incident and the pair fell out over a rumour circulating amongst their friendship group .", + "length": 165 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.47100937366485596 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:23.813763809Z", + "first_section_created": "2025-12-23T09:36:23.815032961Z", + "last_section_published": "2025-12-23T09:36:23.81526447Z", + "all_results_received": "2025-12-23T09:36:23.876386931Z", + "output_generated": "2025-12-23T09:36:23.876561138Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:23.815032961Z", + "publish_time": "2025-12-23T09:36:23.81526447Z", + "first_worker_start": "2025-12-23T09:36:23.815744889Z", + "last_worker_end": "2025-12-23T09:36:23.875445Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:23.815698287Z", + "start_time": "2025-12-23T09:36:23.81576059Z", + "end_time": "2025-12-23T09:36:23.815823192Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:23.81597Z", + "start_time": "2025-12-23T09:36:23.816113Z", + "end_time": "2025-12-23T09:36:23.875445Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:23.815686487Z", + "start_time": "2025-12-23T09:36:23.815744889Z", + "end_time": "2025-12-23T09:36:23.815836893Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:23.815665786Z", + "start_time": "2025-12-23T09:36:23.81575229Z", + "end_time": "2025-12-23T09:36:23.815794391Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4053, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/00755c013fa5dae80958e5b5fb3c4afe6ab0db66.json b/data/output/00755c013fa5dae80958e5b5fb3c4afe6ab0db66.json new file mode 100644 index 0000000..ca8da07 --- /dev/null +++ b/data/output/00755c013fa5dae80958e5b5fb3c4afe6ab0db66.json @@ -0,0 +1,222 @@ +{ + "file_name": "00755c013fa5dae80958e5b5fb3c4afe6ab0db66.txt", + "total_words": 330, + "top_n_words": [ + { + "word": "the", + "count": 21 + }, + { + "word": "and", + "count": 10 + }, + { + "word": "for", + "count": 9 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "of", + "count": 7 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "will", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "airport", + "count": 6 + }, + { + "word": "been", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "5bn) project has not been released.", + "length": 35 + }, + { + "text": "Although a completion date for the £800m ($1.", + "length": 46 + }, + { + "text": "The dome will also offer travellers luxury shops and leisure attractions.", + "length": 73 + }, + { + "text": "’ New look: How Changi Airport in Singapore will look according to architects .", + "length": 81 + }, + { + "text": "Bio dome: The glass doughnut-shaped structure will be made out of glass and steel .", + "length": 83 + }, + { + "text": "Lush interior: The new design for the airport in Singapore features a vast garden area .", + "length": 88 + }, + { + "text": "It’s been awarded the coveted title of best airport in the world for the past seven years running.", + "length": 100 + }, + { + "text": "Safdie, who is renowned for his eco-topic buildings, was behind Singapore’s Marina Bay Sands Resort.", + "length": 102 + }, + { + "text": "Airport of the future: The £800m designs have been unveiled, although no completion date has been announced .", + "length": 110 + }, + { + "text": "And judging from Singapore’s Changi Airport’s plans for expansion, it’s not prepared to give up the top spot.", + "length": 115 + }, + { + "text": "Boasting a lush indoor garden area and a cascading waterfall, renderings for the futuristic bio-dome have been revealed by architect Moshe Safdie.", + "length": 146 + }, + { + "text": "‘For Singaporeans, it will be an exciting world-class destination right here at home, where they can relax and enjoy with their loved ones, again and again.", + "length": 158 + }, + { + "text": "Shaped like a doughnut, the five-storey glass and steel structure – codenamed Project Jewel – will connect the airports existing three terminals via all-glass walkways.", + "length": 172 + }, + { + "text": "Project Jewel’s gardens will also include green walls to offset the emissions from the planes overhead, and it is expected to serve nearly 85 million passengers by the time it is completed.", + "length": 191 + }, + { + "text": "Mr Lee Seow Hiang, CAG’s Chief Executive Officer, said: ‘We are very excited about this opportunity to create at Changi Airport an iconic global attraction that will capture the hearts of both tourists and Singaporeans.", + "length": 223 + }, + { + "text": "‘For tourists, we envisage Project Jewel to be a must-visit Singapore attraction, located strategically at the doorstep of one of the world’s busiest air hubs, and an extension of the Changi brand promise that many travellers worldwide have come to know us for.", + "length": 265 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.40717366337776184 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:24.316063931Z", + "first_section_created": "2025-12-23T09:36:24.317447987Z", + "last_section_published": "2025-12-23T09:36:24.317633094Z", + "all_results_received": "2025-12-23T09:36:24.38309423Z", + "output_generated": "2025-12-23T09:36:24.383237735Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:24.317447987Z", + "publish_time": "2025-12-23T09:36:24.317633094Z", + "first_worker_start": "2025-12-23T09:36:24.318094213Z", + "last_worker_end": "2025-12-23T09:36:24.382227Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:24.318039911Z", + "start_time": "2025-12-23T09:36:24.318094213Z", + "end_time": "2025-12-23T09:36:24.318130414Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:24.318338Z", + "start_time": "2025-12-23T09:36:24.31848Z", + "end_time": "2025-12-23T09:36:24.382227Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:24.318127814Z", + "start_time": "2025-12-23T09:36:24.318194817Z", + "end_time": "2025-12-23T09:36:24.31826462Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:24.318078412Z", + "start_time": "2025-12-23T09:36:24.318122614Z", + "end_time": "2025-12-23T09:36:24.318140615Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2001, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/007572814d0a39a1dfc9d2f8af8f71ddd5b23821.json b/data/output/007572814d0a39a1dfc9d2f8af8f71ddd5b23821.json new file mode 100644 index 0000000..93c9ff2 --- /dev/null +++ b/data/output/007572814d0a39a1dfc9d2f8af8f71ddd5b23821.json @@ -0,0 +1,282 @@ +{ + "file_name": "007572814d0a39a1dfc9d2f8af8f71ddd5b23821.txt", + "total_words": 537, + "top_n_words": [ + { + "word": "the", + "count": 30 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "ferguson", + "count": 11 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "wenger", + "count": 10 + }, + { + "word": "at", + "count": 9 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "for", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "99 .", + "length": 4 + }, + { + "text": "Price: £8.", + "length": 11 + }, + { + "text": "*Louis van Gaal is formidable...", + "length": 32 + }, + { + "text": "but dressing-room was out of bounds .", + "length": 37 + }, + { + "text": "'Everyone was happy for him winning the FA Cup.", + "length": 47 + }, + { + "text": "and he and Ryan Giggs can learn from each other .", + "length": 49 + }, + { + "text": "He later admitted he did not know who had thrown it.", + "length": 52 + }, + { + "text": "You won’t see anyone manage one club for 26 years.", + "length": 52 + }, + { + "text": "I have my doubts, but he’s having a good crack at it.", + "length": 55 + }, + { + "text": "Ferguson wrote: 'Who is to say that Arsene will not beat my record?", + "length": 67 + }, + { + "text": "* I spoke to Rio Ferdinand and Patrice Evra about their Manchester United futures...", + "length": 84 + }, + { + "text": "* I had nothing to do with David Moyes' sacking - I found out by reading the paper .", + "length": 85 + }, + { + "text": "The incident happened when United ended Arsenal's 49-match unbeaten Premier League run.", + "length": 87 + }, + { + "text": "I must say, though, that Arsene Wenger’s reign at Arsenal deserves a special mention.", + "length": 87 + }, + { + "text": "Wenger (right) at Highbury in 1996 after sealing the signing of French striker Nicolas Anelka .", + "length": 95 + }, + { + "text": "Ferguson (right) and Wenger (left) chat during a UEFA coaches forum in Nyon in September 2009 .", + "length": 95 + }, + { + "text": "* I tried to convince David Moyes to keep Mike Phelan as Manchester United assistant manager .", + "length": 95 + }, + { + "text": "Sir Alex Ferguson (right) shouts towards Wenger (left) during a Premier League clash in March 2004 .", + "length": 100 + }, + { + "text": "Ferguson's former player Steve Bruce, now the manager of Hull, looks dejected after the FA Cup final defeat .", + "length": 109 + }, + { + "text": "Arsene Wenger (right) lifts the trophy after Arsenal beat Hull City 3-2 in the FA Cup final earlier this year .", + "length": 111 + }, + { + "text": "In a new updated version of My Autobiography, Ferguson wrote: 'The model I represented has passed into history.", + "length": 111 + }, + { + "text": "The animosity led to a Metropolitan Police commander urging the two bosses to end their feud the following year.", + "length": 112 + }, + { + "text": "Ferguson had a slice of pizza thrown at him after arguments in the tunnel between players and staff from the two teams.", + "length": 119 + }, + { + "text": "Wenger is currently the longest-serving manager in the Premier League, having been in charge of the Gunners for 18 years.", + "length": 121 + }, + { + "text": "The rift was eventually healed in 2009 when Wenger invited the United staff into his office at a Champions League semi-final at the Emirates.", + "length": 141 + }, + { + "text": "' Ferguson and Wenger had a long-lasting rivalry of more than 16 years, with one of the most famous moments being the 'Pizzagate' incident in 2004.", + "length": 147 + }, + { + "text": "The front cover of Ferguson's autobiography, which will be released on Thursday after being updated to include chapters on his life after retirement from management.", + "length": 165 + }, + { + "text": "' Wenger would have to remain in charge at the Emirates until April 2023 to surpass Ferguson's United reign - something the former Old Trafford boss thinks is unlikely.", + "length": 168 + }, + { + "text": "And Ferguson admitted he was happy for the Frenchman when Arsenal ended a nine-year trophy drought in May with a 3-2 victory against Hull City in the FA Cup final at Wembley.", + "length": 174 + }, + { + "text": "Sir Alex Ferguson has expressed his happiness at seeing old rival Arsene Wenger win the FA Cup last season - but doubts that the Arsenal manager will surpass his 26-and-a-half-year reign.", + "length": 187 + }, + { + "text": "I was hoping Steve Bruce would win it for Hull because he was one of my players; but you also had to have a special feeling for Arsene, given the pressure he was under for not winning a trophy for nine years.", + "length": 208 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.3591618537902832 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:24.818401554Z", + "first_section_created": "2025-12-23T09:36:24.818758469Z", + "last_section_published": "2025-12-23T09:36:24.818938176Z", + "all_results_received": "2025-12-23T09:36:24.881045276Z", + "output_generated": "2025-12-23T09:36:24.881204082Z", + "total_processing_time_ms": 62, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:24.818758469Z", + "publish_time": "2025-12-23T09:36:24.818938176Z", + "first_worker_start": "2025-12-23T09:36:24.819490298Z", + "last_worker_end": "2025-12-23T09:36:24.880071Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:24.819455697Z", + "start_time": "2025-12-23T09:36:24.819514099Z", + "end_time": "2025-12-23T09:36:24.819583702Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:24.819692Z", + "start_time": "2025-12-23T09:36:24.819852Z", + "end_time": "2025-12-23T09:36:24.880071Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:24.819463497Z", + "start_time": "2025-12-23T09:36:24.819516099Z", + "end_time": "2025-12-23T09:36:24.819669305Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:24.819431296Z", + "start_time": "2025-12-23T09:36:24.819490298Z", + "end_time": "2025-12-23T09:36:24.819519399Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3034, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0075795e722ebab52dca94aa77e57d5697f3b5bf.json b/data/output/0075795e722ebab52dca94aa77e57d5697f3b5bf.json new file mode 100644 index 0000000..195c0bc --- /dev/null +++ b/data/output/0075795e722ebab52dca94aa77e57d5697f3b5bf.json @@ -0,0 +1,362 @@ +{ + "file_name": "0075795e722ebab52dca94aa77e57d5697f3b5bf.txt", + "total_words": 751, + "top_n_words": [ + { + "word": "the", + "count": 67 + }, + { + "word": "of", + "count": 27 + }, + { + "word": "and", + "count": 18 + }, + { + "word": "oil", + "count": 18 + }, + { + "word": "is", + "count": 16 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "to", + "count": 14 + }, + { + "word": "are", + "count": 10 + }, + { + "word": "for", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "Gov.", + "length": 4 + }, + { + "text": "Who knows?", + "length": 10 + }, + { + "text": "There is no clean water.", + "length": 24 + }, + { + "text": "Who is telling the truth?", + "length": 25 + }, + { + "text": "Electricity is sporadic at best.", + "length": 32 + }, + { + "text": "This is life in the Niger Delta.", + "length": 32 + }, + { + "text": "The other is the Nigerian government.", + "length": 37 + }, + { + "text": "Either way, the creeks are blackened.", + "length": 37 + }, + { + "text": "But to most Nigerians -- oil is a curse.", + "length": 40 + }, + { + "text": "Basic infrastructure is severely lacking.", + "length": 41 + }, + { + "text": "Oil companies are only part of the equation.", + "length": 44 + }, + { + "text": "\"There's a lot of improvement,\" Amaechi said.", + "length": 45 + }, + { + "text": "Medical and educational services are limited.", + "length": 45 + }, + { + "text": "See environmental battle lines for \"Planet in Peril\" » .", + "length": 57 + }, + { + "text": "PORT HARCOURT, Nigeria (CNN) -- Trash litters its cities.", + "length": 58 + }, + { + "text": "Somehow, little money actually reaches its intended destination.", + "length": 64 + }, + { + "text": "It has provoked an environmental disaster of monstrous proportions.", + "length": 67 + }, + { + "text": "The battle is over oil -- one of the world's most valuable resources.", + "length": 69 + }, + { + "text": "MEND, formed in 2005, said it has more than 30 camps throughout Nigeria.", + "length": 72 + }, + { + "text": "Kidnappings for ransom, robberies and even murder happen with regularity.", + "length": 73 + }, + { + "text": "The August spill was a result of a leak from an old pipeline that had corroded.", + "length": 79 + }, + { + "text": "They are still happening and the consequences are nothing short of devastating.", + "length": 79 + }, + { + "text": "Toxicity overpowers the air and a sense of lifelessness pervades the landscape.", + "length": 79 + }, + { + "text": "Nigeria's federal government and oil companies split oil profits roughly 60-40.", + "length": 79 + }, + { + "text": "Lisa Ling travels to secret location to meet notorious Nigerian militant group » .", + "length": 83 + }, + { + "text": "Members are armed with high-tech weaponry they said was obtained from \"foreign sources.", + "length": 87 + }, + { + "text": "These are not conditions that should plague one of the richest oil states in the world.", + "length": 87 + }, + { + "text": "Collecting food becomes impossible when a spill happens, like one that occurred in August.", + "length": 90 + }, + { + "text": "Conversely, the average Nigerian has suffered as a result of the country's oil prosperity.", + "length": 90 + }, + { + "text": "The waterways and mangroves are blanketed in thick brown oil sludge that goes on for miles.", + "length": 91 + }, + { + "text": "They say they are at war against the Nigerian military and the oil companies operating there.", + "length": 93 + }, + { + "text": "Once the leak was reported, the company said it was denied access to the site by the community.", + "length": 95 + }, + { + "text": "Yet, there is no international outcry and rarely are the spills reported, even to most Nigerians.", + "length": 97 + }, + { + "text": "Communities along the Niger Delta have lived off subsistence fishing and agriculture for decades.", + "length": 97 + }, + { + "text": "The money is then supposed to make its way down to the local governments to fund various projects.", + "length": 98 + }, + { + "text": "\" Hundreds of people have been killed on both sides and countless oil workers have been kidnapped.", + "length": 98 + }, + { + "text": "The biggest and most powerful armed group is the Movement for the Emancipation of the Niger Delta, or MEND.", + "length": 107 + }, + { + "text": "\" Over the last few years, a culture of militancy and violence has arisen in the absence of jobs and services.", + "length": 110 + }, + { + "text": "\"Planet in Peril\" met in a secret location with members of the Movement for the Emancipation of the Niger Delta.", + "length": 112 + }, + { + "text": "Hundreds of billions of dollars has been made from the Niger Delta's oil reserves and many people have gotten very rich.", + "length": 120 + }, + { + "text": "Leaders of the village deny that, and the finger-pointing between the two sides is nothing new -- there is no love lost here.", + "length": 125 + }, + { + "text": "Nigeria's own corruption agency estimates between $300 billion to $400 billion has been stolen or wasted over the last 50 years.", + "length": 128 + }, + { + "text": "It took the oil company three months to clamp the leak, but the company said it wasn't reported for a full month after it began.", + "length": 128 + }, + { + "text": "Many say it will take 10-15 years for the area to be free of contamination -- if the cleanup effort commences in a timely manner.", + "length": 129 + }, + { + "text": "Over the years, MEND's attacks on oil pipelines have halted oil production and, therefore, raised the price of oil around the world.", + "length": 132 + }, + { + "text": "\"The work being done by the corruption agency and the federal government has somehow been able to control the level of corruption in government.", + "length": 144 + }, + { + "text": "They demand oil profits be distributed to average Nigerians of the Niger Delta and said they will not stop their attacks until their objectives have been fulfilled.", + "length": 164 + }, + { + "text": "Transparency International, a global organization intent on stamping out corruption, has consistently rated Nigeria's government one of the most corrupt in the world.", + "length": 166 + }, + { + "text": "Rotimi Amaechi of Rivers state, one of the largest oil producers of Nigeria's 36 states, acknowledges past problems with corruption, but thinks progress is being made.", + "length": 167 + }, + { + "text": "The United States Agency for International Development says more than 70 percent of the country lives on less than a dollar a day -- the population is among the 20 poorest in the world.", + "length": 185 + }, + { + "text": "Since the 1970s, the United Nations estimates there have been more than 6,000 oil spills in the Niger Delta -- that is equal to more than 10 times the amount spilled from the Exxon Valdez in 1989.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8658036589622498 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:25.319715736Z", + "first_section_created": "2025-12-23T09:36:25.321649814Z", + "last_section_published": "2025-12-23T09:36:25.321855922Z", + "all_results_received": "2025-12-23T09:36:25.383595308Z", + "output_generated": "2025-12-23T09:36:25.383787415Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:25.321649814Z", + "publish_time": "2025-12-23T09:36:25.321855922Z", + "first_worker_start": "2025-12-23T09:36:25.322354542Z", + "last_worker_end": "2025-12-23T09:36:25.38239Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:25.322336742Z", + "start_time": "2025-12-23T09:36:25.322404144Z", + "end_time": "2025-12-23T09:36:25.322481547Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:25.322503Z", + "start_time": "2025-12-23T09:36:25.32265Z", + "end_time": "2025-12-23T09:36:25.38239Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:25.32230064Z", + "start_time": "2025-12-23T09:36:25.322427845Z", + "end_time": "2025-12-23T09:36:25.322514649Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:25.322257738Z", + "start_time": "2025-12-23T09:36:25.322354542Z", + "end_time": "2025-12-23T09:36:25.322393944Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4462, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/00759239d90551d978bbe5298d344fc0c1317dff.json b/data/output/00759239d90551d978bbe5298d344fc0c1317dff.json new file mode 100644 index 0000000..1c574b5 --- /dev/null +++ b/data/output/00759239d90551d978bbe5298d344fc0c1317dff.json @@ -0,0 +1,214 @@ +{ + "file_name": "00759239d90551d978bbe5298d344fc0c1317dff.txt", + "total_words": 274, + "top_n_words": [ + { + "word": "in", + "count": 13 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "a", + "count": 8 + }, + { + "word": "are", + "count": 8 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "chances", + "count": 6 + }, + { + "word": "pivit", + "count": 4 + }, + { + "word": "republicans", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Mark Pryor in Arkansas, who's down to 2%.", + "length": 41 + }, + { + "text": "Pivit also offers bad news for Democratic Sen.", + "length": 46 + }, + { + "text": "In that race, Pivit puts Ernst's chances at 88%.", + "length": 48 + }, + { + "text": "Jeanne Shaheen's chances of beating Scott Brown are up to 81%.", + "length": 62 + }, + { + "text": "Mark Udall in Colorado, whose chances are at just 6%, and Sen.", + "length": 62 + }, + { + "text": "Republicans need a net gain of six seats to capture a majority.", + "length": 63 + }, + { + "text": "Republicans, meanwhile, look to have put Kentucky's Senate race out of reach.", + "length": 77 + }, + { + "text": "Pivit's analysis suggests Democrats are in better shape in North Carolina, where Sen.", + "length": 85 + }, + { + "text": "Kay Hagan has a 69% chance of holding off Republican challenger Thom Tillis, and in New Hampshire, where Sen.", + "length": 109 + }, + { + "text": "Senate GOP leader Mitch McConnell's chances of surviving a challenge from Democrat Alison Lundergan Grimes are up to 97%.", + "length": 121 + }, + { + "text": "Pick-ups in Montana, South Dakota and West Virginia are all but assured, and Democratic incumbents are facing uphill battles in Alaska, Arkansas and Louisiana, as well.", + "length": 168 + }, + { + "text": "The jump was driven largely by a Des Moines Register poll over the weekend that put Republican Joni Ernst 7 percentage points ahead of Democrat Bruce Braley in a Senate race seen as a must-win for Democrats.", + "length": 207 + }, + { + "text": "Washington (CNN) -- Republicans' chances of winning control of the Senate in Tuesday's midterm elections are now up to 95%, according to CNN's Pivit, an analysis that combines experts' projections with political watchers' predictions on key races.", + "length": 247 + }, + { + "text": "Republicans are also hoping for wins in states like Colorado, Iowa, New Hampshire and North Carolina -- which would help solidify their chances of gaining a majority by providing insulation in case the GOP loses seats of its own in Georgia and Kansas.", + "length": 251 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6336681246757507 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:25.822649683Z", + "first_section_created": "2025-12-23T09:36:25.823004997Z", + "last_section_published": "2025-12-23T09:36:25.823172104Z", + "all_results_received": "2025-12-23T09:36:25.887029975Z", + "output_generated": "2025-12-23T09:36:25.887188281Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:25.823004997Z", + "publish_time": "2025-12-23T09:36:25.823172104Z", + "first_worker_start": "2025-12-23T09:36:25.823739227Z", + "last_worker_end": "2025-12-23T09:36:25.886106Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:25.823684525Z", + "start_time": "2025-12-23T09:36:25.823740727Z", + "end_time": "2025-12-23T09:36:25.823774828Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:25.82386Z", + "start_time": "2025-12-23T09:36:25.823989Z", + "end_time": "2025-12-23T09:36:25.886106Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:25.823677225Z", + "start_time": "2025-12-23T09:36:25.823739227Z", + "end_time": "2025-12-23T09:36:25.823776529Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:25.823749927Z", + "start_time": "2025-12-23T09:36:25.82380723Z", + "end_time": "2025-12-23T09:36:25.823827231Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1600, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/00761957757cf9724517fec5bf17fcbda42e0716.json b/data/output/00761957757cf9724517fec5bf17fcbda42e0716.json new file mode 100644 index 0000000..869f2d5 --- /dev/null +++ b/data/output/00761957757cf9724517fec5bf17fcbda42e0716.json @@ -0,0 +1,234 @@ +{ + "file_name": "00761957757cf9724517fec5bf17fcbda42e0716.txt", + "total_words": 396, + "top_n_words": [ + { + "word": "and", + "count": 15 + }, + { + "word": "the", + "count": 14 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "her", + "count": 10 + }, + { + "word": "was", + "count": 9 + }, + { + "word": "year", + "count": 9 + }, + { + "word": "old", + "count": 8 + }, + { + "word": "he", + "count": 6 + }, + { + "word": "in", + "count": 6 + }, + { + "word": "jackson", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Laura Cox .", + "length": 11 + }, + { + "text": "com reported.", + "length": 13 + }, + { + "text": "Police say he was invited into Jackson's home, where the bodies were later found.", + "length": 81 + }, + { + "text": "Tragic: The smiling mother and daughter, Heather Jackson (right) and three-year-old Celina .", + "length": 92 + }, + { + "text": "Curtis Clinton is accused of strangling to death Heather Jackson and her two young children .", + "length": 93 + }, + { + "text": "He was arrested on Monday from the Bellevue Hospital, where he was receiving unspecified treatment.", + "length": 99 + }, + { + "text": "He is being held in the Erie County Jail, where he will stay until another court appearance next Tuesday.", + "length": 105 + }, + { + "text": "She was found dead from strangulation in a Fostoria mobile home and Clinton pleaded guilty two years later.", + "length": 107 + }, + { + "text": "All three died of strangulation, a coroner found, and police are investigating the identity of a murder weapon.", + "length": 111 + }, + { + "text": "Already facing charges of raping a 13-year-old girl, the registered sex offender is being held on $3million bond.", + "length": 113 + }, + { + "text": "She and her children had lived in the property for around a month, police said, and she had known Clinton for about five months.", + "length": 128 + }, + { + "text": "Jackson's lifeless body had been shoved between a mattress and a bed frame, with her children's hidden inside a closet, NorwalkRegister.", + "length": 136 + }, + { + "text": "At this stage officers say they believe Clinton acted without an accomplice, though they are continuing to investigate and conduct interviews.", + "length": 142 + }, + { + "text": "Concerned friends had asked officers to check on the young mother, having not heard from her all day, and at around 8pm police made the grisly discovery.", + "length": 153 + }, + { + "text": "A young mother and her two infant children were strangled to death by her ex-con friend, police revealed on Friday, as the friend was charged with their murder.", + "length": 160 + }, + { + "text": "23-year-old Heather Jackson's body was found brutally slain at her home in Ohio on Saturday, alongside her three-year-old daughter Celina and son, Wayne, aged 18 months.", + "length": 169 + }, + { + "text": "The man allegedly behind the killings, 41-year-old Curtis Clinton, was charged with three counts of aggravated murder, seven months after being released from a 13-year term for assault and involuntary manslaughter.", + "length": 214 + }, + { + "text": "23-year-old Heather Jackson's body was found brutally slain at her home in Ohio on Saturday, alongside her three-year-old daughter Celina (right) and son, Wayne, aged 18 months (left) It is not yet known what his motive was but police say he has a lengthy criminal history including the assault and manslaughter conviction, over the death of 18-year-old Misty Keckler in 1997.", + "length": 376 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8166037201881409 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:26.32334324Z", + "first_section_created": "2025-12-23T09:36:26.323659353Z", + "last_section_published": "2025-12-23T09:36:26.32384106Z", + "all_results_received": "2025-12-23T09:36:26.389230293Z", + "output_generated": "2025-12-23T09:36:26.3894082Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:26.323659353Z", + "publish_time": "2025-12-23T09:36:26.32384106Z", + "first_worker_start": "2025-12-23T09:36:26.324445084Z", + "last_worker_end": "2025-12-23T09:36:26.388178Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:26.324421983Z", + "start_time": "2025-12-23T09:36:26.324483186Z", + "end_time": "2025-12-23T09:36:26.324534488Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:26.324662Z", + "start_time": "2025-12-23T09:36:26.324803Z", + "end_time": "2025-12-23T09:36:26.388178Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:26.324431084Z", + "start_time": "2025-12-23T09:36:26.324497287Z", + "end_time": "2025-12-23T09:36:26.324560089Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:26.324391582Z", + "start_time": "2025-12-23T09:36:26.324445084Z", + "end_time": "2025-12-23T09:36:26.324462485Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2324, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/00764d9499baed37590be445138c643edf121dcb.json b/data/output/00764d9499baed37590be445138c643edf121dcb.json new file mode 100644 index 0000000..4d0d3a8 --- /dev/null +++ b/data/output/00764d9499baed37590be445138c643edf121dcb.json @@ -0,0 +1,386 @@ +{ + "file_name": "00764d9499baed37590be445138c643edf121dcb.txt", + "total_words": 820, + "top_n_words": [ + { + "word": "the", + "count": 44 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "in", + "count": 19 + }, + { + "word": "that", + "count": 16 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "are", + "count": 13 + }, + { + "word": "our", + "count": 12 + }, + { + "word": "s", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "But it's over.", + "length": 14 + }, + { + "text": "Marco Rubio and Govs.", + "length": 21 + }, + { + "text": "And some of them won.", + "length": 21 + }, + { + "text": "And my jibes are in jest.", + "length": 25 + }, + { + "text": "You have jumped the shark.", + "length": 26 + }, + { + "text": "There are tons of reasons.", + "length": 26 + }, + { + "text": "They've had an amazing run.", + "length": 27 + }, + { + "text": "I'm sincerely not gloating.", + "length": 27 + }, + { + "text": "Nikki Haley and Bobby Jindal.", + "length": 29 + }, + { + "text": "So WASPs, you've had your great run.", + "length": 36 + }, + { + "text": ") This is a testament to our nation.", + "length": 36 + }, + { + "text": "We are by our very nature progressive.", + "length": 38 + }, + { + "text": "But there's no need to feel bad for WASPs.", + "length": 42 + }, + { + "text": "(Of course, he is the back up to Mark Sanchez.", + "length": 46 + }, + { + "text": "Well, it's kind of complicated (as these things tend to be).", + "length": 60 + }, + { + "text": "Every single president in our nation's history, except for John F.", + "length": 66 + }, + { + "text": "Minorities no longer just voted -- they became active in politics.", + "length": 66 + }, + { + "text": "(I know some of you are thinking: How is that different than today?", + "length": 67 + }, + { + "text": "Opinion: In Ohio, candidates are salesmen trying to close the deal .", + "length": 68 + }, + { + "text": "Kennedy -- a Catholic -- and Barack Obama, has been a white Protestant.", + "length": 71 + }, + { + "text": "WASPs had almost as many victories in a row as The Harlem Globetrotters.", + "length": 72 + }, + { + "text": "That means rich white men with money get to control the political system.", + "length": 73 + }, + { + "text": "There's little doubt that we will see more diverse presidential candidates.", + "length": 75 + }, + { + "text": "Opinion: Democrats and Republicans need a plan to keep American dream alive .", + "length": 77 + }, + { + "text": "They are well aware that if they don't, the GOP will go the way of the Whigs.", + "length": 77 + }, + { + "text": ") The opinions expressed in this commentary are solely those of Dean Obeidallah.", + "length": 80 + }, + { + "text": "Times are so bleak for WASPs that there's not a single one on the Supreme Court.", + "length": 80 + }, + { + "text": "Objectively, the delegates at this year's Democratic convention were far more diverse.", + "length": 86 + }, + { + "text": "(CNN) -- Attention white Anglo-Saxon Protestants: Your days of running things are over.", + "length": 87 + }, + { + "text": "Back in the days when our nation was founded, only white men who owned land could vote.", + "length": 87 + }, + { + "text": "It may take years, or even decades, to see change, but we always march forward, not back.", + "length": 89 + }, + { + "text": "And the religious restrictions were also lifted so that non-Protestants were able to vote.", + "length": 90 + }, + { + "text": "Look at this year's presidential tickets: A Mormon, an African-American, and two Catholics.", + "length": 91 + }, + { + "text": "Enfranchising voters of all backgrounds has led to the opening up of our democratic process.", + "length": 92 + }, + { + "text": "The Republican convention looked more like the early bird dinner crowd at The Cracker Barrel.", + "length": 93 + }, + { + "text": "And they didn't just show up at political meetings -- over time they sought elective offices.", + "length": 93 + }, + { + "text": "Even some of the keynote speakers at the Democratic and Republican conventions were not WASPs.", + "length": 94 + }, + { + "text": "Over time, some states abandoned the requirement of land ownership so that poor white men could vote.", + "length": 101 + }, + { + "text": "So, how did we get to where we are today where the white Protestant establishment seems to be losing power?", + "length": 107 + }, + { + "text": "But one important factor that has contributed to today's political landscape is changing voting rights laws.", + "length": 108 + }, + { + "text": "And we will likely see in the not too distant future a president who is Latino, Asian, Jewish, Sikh or Muslim.", + "length": 110 + }, + { + "text": "The GOP featured Italian-Irish Catholic Chris Christie and the Democrats tapped Latino-American, Julian Castro.", + "length": 111 + }, + { + "text": "The demographics of our nation are changing and, by 2042, minorities are expected to become the majority in the U.", + "length": 114 + }, + { + "text": "But what I'm happy about is that our two major political parties are increasingly reflecting the new face of America.", + "length": 117 + }, + { + "text": "However, in the GOP's defense, a party that is 90% white, they have started to slowly showcase minorities, such as Sen.", + "length": 119 + }, + { + "text": "(Although poll taxes, literacy laws and other measures were still employed in some states to disenfranchise black voters.", + "length": 121 + }, + { + "text": "Likewise, in Congress, the percentage of Protestants fell from 74 percent in 1961 to a slim majority of 55 percent today.", + "length": 121 + }, + { + "text": "And there is no doubt that another white Protestant will rise up one day against the growing odds and win the White House.", + "length": 122 + }, + { + "text": "Neither the current Speaker of the House (John Boehner: Catholic) nor the Senate majority leader (Harry Reid: Mormon) is WASP.", + "length": 126 + }, + { + "text": "(That screaming sound you might have heard was Michele Bachmann shrieking in horror at the idea of a Muslim-American president.", + "length": 127 + }, + { + "text": "Except for a handful of exceptions, for over 200 years the presidential nominees of both major political parties have been WASPs.", + "length": 129 + }, + { + "text": "With each success, they inspire even more minorities of every race, ethnicity and religion to become active in our political system.", + "length": 132 + }, + { + "text": "However, it wasn't until after 1870, when the 15th Amendment was ratified, that black American citizens were finally guaranteed the right to vote.", + "length": 146 + }, + { + "text": ") In our first presidential election in 1789, no women, no blacks, no poor white men, and in many states neither Catholics nor Jews, were permitted to vote.", + "length": 156 + }, + { + "text": "But until that day comes, you can console yourself knowing that a white Protestant male is one of the most exciting athletes in our nation today: Tim Tebow.", + "length": 156 + }, + { + "text": ") And it took all the way to 1920 -- more than 100 years after our first president was elected -- that women were finally given the right to vote with the passage of the 19th Amendment to our Constitution.", + "length": 205 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6052877902984619 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:26.824696724Z", + "first_section_created": "2025-12-23T09:36:26.825086439Z", + "last_section_published": "2025-12-23T09:36:26.82534605Z", + "all_results_received": "2025-12-23T09:36:26.890696081Z", + "output_generated": "2025-12-23T09:36:26.89091729Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:26.825086439Z", + "publish_time": "2025-12-23T09:36:26.82534605Z", + "first_worker_start": "2025-12-23T09:36:26.825794168Z", + "last_worker_end": "2025-12-23T09:36:26.889827Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:26.825745366Z", + "start_time": "2025-12-23T09:36:26.825816669Z", + "end_time": "2025-12-23T09:36:26.825902572Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:26.82603Z", + "start_time": "2025-12-23T09:36:26.826167Z", + "end_time": "2025-12-23T09:36:26.889827Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:26.825785967Z", + "start_time": "2025-12-23T09:36:26.825865171Z", + "end_time": "2025-12-23T09:36:26.825981475Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:26.825726465Z", + "start_time": "2025-12-23T09:36:26.825794168Z", + "end_time": "2025-12-23T09:36:26.825837469Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4729, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0076c94a296cbd799c3ae46f55072fac726c3a4a.json b/data/output/0076c94a296cbd799c3ae46f55072fac726c3a4a.json new file mode 100644 index 0000000..3e68e19 --- /dev/null +++ b/data/output/0076c94a296cbd799c3ae46f55072fac726c3a4a.json @@ -0,0 +1,210 @@ +{ + "file_name": "0076c94a296cbd799c3ae46f55072fac726c3a4a.txt", + "total_words": 227, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "s", + "count": 7 + }, + { + "word": "was", + "count": 7 + }, + { + "word": "detroit", + "count": 5 + }, + { + "word": "in", + "count": 5 + }, + { + "word": "officer", + "count": 5 + }, + { + "word": "said", + "count": 5 + }, + { + "word": "to", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "CNN's Dominique Dodley contributed to this report.", + "length": 50 + }, + { + "text": "The two inmates did not intervene or leave the cell, according to police.", + "length": 73 + }, + { + "text": "Tolliver, 63, was taken to the Detroit Receiving Hospital and later released.", + "length": 77 + }, + { + "text": "Pearson was facing sentencing on Monday on carjacking and armed robbery charges.", + "length": 80 + }, + { + "text": "Tolliver is a retired Detroit police officer who joined the Sheriff's Office in December.", + "length": 89 + }, + { + "text": "The arrest could add at least 11 more charges to the crimes he was already facing, Napoleon said.", + "length": 97 + }, + { + "text": "According to the sheriff's office, the 25-year old has a lengthy criminal record and was on parole.", + "length": 99 + }, + { + "text": "Pearson escaped from the rear of the building, carjacked a citizen and drove away in a Dodge minivan.", + "length": 101 + }, + { + "text": "The Dodge was recovered and the officer's uniform was found under a vehicle near Beaubien Street in Detroit.", + "length": 108 + }, + { + "text": "The inmate, Abraham Pearson, was spotted walking in a Detroit neighborhood Monday night, said Wayne County Sheriff Benny Napoleon.", + "length": 130 + }, + { + "text": "He then handcuffed Deputy Harrison Tolliver, and fled, also taking the officer's cell phone and radio, the Wayne County Sheriff's Office said.", + "length": 142 + }, + { + "text": "(CNN) -- A Detroit man, who stabbed an officer outside a courtroom and escaped wearing the officer's uniform, has been captured, authorities said.", + "length": 146 + }, + { + "text": "Authorities said Pearson attacked a deputy with a sharpened comb in front of two other inmates inside a holding cell at the Frank Murphy Hall Monday morning.", + "length": 157 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6768180727958679 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:27.326166317Z", + "first_section_created": "2025-12-23T09:36:27.32772808Z", + "last_section_published": "2025-12-23T09:36:27.327904387Z", + "all_results_received": "2025-12-23T09:36:27.390295999Z", + "output_generated": "2025-12-23T09:36:27.390423404Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:27.32772808Z", + "publish_time": "2025-12-23T09:36:27.327904387Z", + "first_worker_start": "2025-12-23T09:36:27.328353705Z", + "last_worker_end": "2025-12-23T09:36:27.389443Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:27.328312703Z", + "start_time": "2025-12-23T09:36:27.328365705Z", + "end_time": "2025-12-23T09:36:27.328388906Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:27.328519Z", + "start_time": "2025-12-23T09:36:27.328695Z", + "end_time": "2025-12-23T09:36:27.389443Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:27.328333104Z", + "start_time": "2025-12-23T09:36:27.328391206Z", + "end_time": "2025-12-23T09:36:27.328431408Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:27.328313603Z", + "start_time": "2025-12-23T09:36:27.328353705Z", + "end_time": "2025-12-23T09:36:27.328367306Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1361, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/0076f01b59fbd43bb1f6fe3d00e70f30622befdb.json b/data/output/0076f01b59fbd43bb1f6fe3d00e70f30622befdb.json new file mode 100644 index 0000000..63ef784 --- /dev/null +++ b/data/output/0076f01b59fbd43bb1f6fe3d00e70f30622befdb.json @@ -0,0 +1,262 @@ +{ + "file_name": "0076f01b59fbd43bb1f6fe3d00e70f30622befdb.txt", + "total_words": 409, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "on", + "count": 10 + }, + { + "word": "to", + "count": 10 + }, + { + "word": "s", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "more", + "count": 5 + }, + { + "word": "wirathu", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "\"It's intolerable for U.", + "length": 24 + }, + { + "text": "\"To us, you are just a whore.", + "length": 29 + }, + { + "text": "rapporteur a \"whore\" at a protest.", + "length": 34 + }, + { + "text": "Special Rapporteurs to be treated in this way.", + "length": 46 + }, + { + "text": "\" The comments drew a sharp response from the U.", + "length": 48 + }, + { + "text": "READ MORE: Aung San Suu Kyi's 'silence' on the Rohingya .", + "length": 57 + }, + { + "text": "She had spoken out about the crisis facing the country's 1.", + "length": 59 + }, + { + "text": "'s Special Rapporteur on Myanmar, at a public rally on Friday.", + "length": 62 + }, + { + "text": "She made reference to Wirathu's comments in a statement this week.", + "length": 66 + }, + { + "text": "Wirathu was jailed in 2003 for inciting anti-Muslim violence, but released in an amnesty nine years later.", + "length": 106 + }, + { + "text": "Ashin Wirathu, the leader of the far-right, anti-Muslim 969 movement, made the remarks about Yanghee Lee, the U.", + "length": 112 + }, + { + "text": "READ MORE: 'Caught between a hammer and an anvil' READ MORE: Rohingya \"not welcome\" READ MORE: Curfew imposed after deadly violence .", + "length": 133 + }, + { + "text": "Lee also criticized proposed law changes backed by the monks, including a bill restricting interfaith marriage and religious conversions.", + "length": 137 + }, + { + "text": "High Commissioner for Human Rights, Zeid Ra'ad Al Hussein, who described Wirathu's remarks as \"sexist,\" \"insulting\" and \"utterly unacceptable.", + "length": 142 + }, + { + "text": "\"Don't assume you are a respectable person, just because of your position,\" he said in the speech, footage of which was widely circulated on social media.", + "length": 154 + }, + { + "text": "Since an outbreak of communal violence between Buddhists and Muslims in 2012, more than 130,000 live in wretched displacement camps they are forbidden to leave.", + "length": 160 + }, + { + "text": "3 million-strong Rohingya Muslim minority, most of whom live under apartheid-like conditions in Rakhine state, with limited access to adequate healthcare and education.", + "length": 168 + }, + { + "text": "(CNN)The United Nations' top human rights official has called on Myanmar's leaders to \"unequivocally condemn\" an ultra-nationalist Buddhist monk who labeled a visiting U.", + "length": 170 + }, + { + "text": "\"During my visit, I was personally subjected to the kind of sexist intimidation that female human rights defenders experience when advocating on controversial issues,\" she said.", + "length": 177 + }, + { + "text": "\" Lee was on a 10-day visit reporting on the human rights situation in the predominantly Buddhist southeast Asian country, which is emerging from a half-century of military rule.", + "length": 178 + }, + { + "text": "Myanmar's Minister of Information and presidential spokesperson Ye Htut posted comments on his Facebook page indicating he would ask the Ministry of Religious Affairs to look into Wirathu's speech.", + "length": 198 + }, + { + "text": "\" \"I call on religious and political leaders in Myanmar to unequivocally condemn all forms of incitement to hatred, including this abhorrent public personal attack,\" he said in a statement released from Geneva Wednesday.", + "length": 220 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7270798087120056 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:27.828751256Z", + "first_section_created": "2025-12-23T09:36:27.830915143Z", + "last_section_published": "2025-12-23T09:36:27.831131451Z", + "all_results_received": "2025-12-23T09:36:27.901027166Z", + "output_generated": "2025-12-23T09:36:27.901181972Z", + "total_processing_time_ms": 72, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:27.830915143Z", + "publish_time": "2025-12-23T09:36:27.831131451Z", + "first_worker_start": "2025-12-23T09:36:27.831763377Z", + "last_worker_end": "2025-12-23T09:36:27.900101Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:27.831795078Z", + "start_time": "2025-12-23T09:36:27.831866481Z", + "end_time": "2025-12-23T09:36:27.831921283Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:27.831962Z", + "start_time": "2025-12-23T09:36:27.832102Z", + "end_time": "2025-12-23T09:36:27.900101Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:27.831735576Z", + "start_time": "2025-12-23T09:36:27.831806779Z", + "end_time": "2025-12-23T09:36:27.831872781Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:27.831709175Z", + "start_time": "2025-12-23T09:36:27.831763377Z", + "end_time": "2025-12-23T09:36:27.831786878Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2504, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/00773877bc7719de3f57057eee2cc600a3d60a19.json b/data/output/00773877bc7719de3f57057eee2cc600a3d60a19.json new file mode 100644 index 0000000..651ab6c --- /dev/null +++ b/data/output/00773877bc7719de3f57057eee2cc600a3d60a19.json @@ -0,0 +1,576 @@ +{ + "file_name": "00773877bc7719de3f57057eee2cc600a3d60a19.txt", + "total_words": 1395, + "top_n_words": [ + { + "word": "was", + "count": 52 + }, + { + "word": "the", + "count": 47 + }, + { + "word": "to", + "count": 41 + }, + { + "word": "i", + "count": 39 + }, + { + "word": "and", + "count": 32 + }, + { + "word": "that", + "count": 26 + }, + { + "word": "he", + "count": 22 + }, + { + "word": "her", + "count": 22 + }, + { + "word": "had", + "count": 20 + }, + { + "word": "a", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "‘ .", + "length": 5 + }, + { + "text": "I was .", + "length": 7 + }, + { + "text": "shocked.", + "length": 8 + }, + { + "text": "collapses.", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "distressed.", + "length": 11 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "I became very .", + "length": 15 + }, + { + "text": "Kelly Strange .", + "length": 15 + }, + { + "text": "‘I couldn’t relax.", + "length": 22 + }, + { + "text": "12:25 EST, 29 May 2013 .", + "length": 24 + }, + { + "text": "08:10 EST, 29 May 2013 .", + "length": 24 + }, + { + "text": "They should have listened.", + "length": 26 + }, + { + "text": "I knew that wasn’t right.", + "length": 27 + }, + { + "text": "‘It all happened so fast.", + "length": 27 + }, + { + "text": "‘I want justice for Alfie.", + "length": 28 + }, + { + "text": "just 'lazy' and told to relax.", + "length": 30 + }, + { + "text": "He had never been lazy at all.", + "length": 30 + }, + { + "text": "‘I felt like I was going mad.", + "length": 31 + }, + { + "text": "Alarm bells were ringing and I .", + "length": 32 + }, + { + "text": "No mother should go through that.", + "length": 33 + }, + { + "text": "This has been addressed with her.", + "length": 33 + }, + { + "text": "'That is why he had been so still.", + "length": 34 + }, + { + "text": "The poor little mite wasn’t lazy.", + "length": 35 + }, + { + "text": "She was once again assured he was .", + "length": 35 + }, + { + "text": "'He was like a statue on the screen.", + "length": 36 + }, + { + "text": "concerns that her baby wasn't moving.", + "length": 37 + }, + { + "text": "Mrs Copland said: ‘It’s disgusting.", + "length": 39 + }, + { + "text": "Alfie died just five hours after birth .", + "length": 40 + }, + { + "text": "I knew something was wrong with my baby.", + "length": 40 + }, + { + "text": "‘That’s when the sonographer turned .", + "length": 41 + }, + { + "text": "That is why I had not felt a movement or kick.", + "length": 46 + }, + { + "text": "started crying because I knew it wasn’t right.", + "length": 48 + }, + { + "text": "’ Mrs Copland, from Crowland in Lincolnshire, .", + "length": 49 + }, + { + "text": "‘I even started to think perhaps I was paranoid.", + "length": 50 + }, + { + "text": "He died just hours after birth several weeks later.", + "length": 51 + }, + { + "text": "She then set about seeking answers from the hospital.", + "length": 53 + }, + { + "text": "She said: ‘I broke down when they brought him to me.", + "length": 54 + }, + { + "text": "He couldn’t move because his limbs were fused solid.", + "length": 54 + }, + { + "text": "‘I kept asking why he wasn’t moving,' she recalled.", + "length": 55 + }, + { + "text": "’ The hospital has also issued the following statement.", + "length": 57 + }, + { + "text": "I wanted to know how they could have missed it for so long.", + "length": 59 + }, + { + "text": "Her son was buried next to the baby Lucy had lost at 17 weeks.", + "length": 62 + }, + { + "text": "‘I had repeatedly told them something was wrong with my baby.", + "length": 63 + }, + { + "text": "returned two weeks later for another scan where she again raised .", + "length": 66 + }, + { + "text": "around and told me not to worry because he was just a ‘lazy boy.", + "length": 66 + }, + { + "text": "Alfie was eventually diagnosed with fetal akinesia deformation sequence.", + "length": 72 + }, + { + "text": "It was inappropriate that the sonographer suggested your baby was \"lazy\".", + "length": 73 + }, + { + "text": "Her husband Ben (pictured) was forced to break the heartbreaking news to her .", + "length": 78 + }, + { + "text": "It wasn't until 26 weeks in that doctors spotted a serious congenital disorder .", + "length": 80 + }, + { + "text": "They kept saying my baby was just a \"lazy boy\", when in fact he was seriously ill.", + "length": 82 + }, + { + "text": "At her 20-week scan became distressed again when she noticed the baby wasn't moving.", + "length": 84 + }, + { + "text": "Alfie lived for five hours but passed away before his mother woke from the operation.", + "length": 85 + }, + { + "text": "Lucy is now looking for answers to the questions surrounding her pregnancy with Alfie .", + "length": 87 + }, + { + "text": "Lucy Copland, 27, was concerned when her son was not kicking or moving during pregnancy.", + "length": 88 + }, + { + "text": "As far as they were concerned he was just lazy and I was made to feel like a paranoid mum.", + "length": 90 + }, + { + "text": "‘I was utterly distraught and angry at the way I had been treated but my priority was my baby.", + "length": 96 + }, + { + "text": "She explained: ‘I’d suffered a previous loss at 17 weeks so the scan was to offer assurance.", + "length": 96 + }, + { + "text": "'The Trust will continue to offer its guidance to the family should they need any further advice.", + "length": 97 + }, + { + "text": "He was delivered via C-section and died just hours later and before Lucy had regained consciousness.", + "length": 100 + }, + { + "text": "'We have completed a full investigation followed by a meeting with the family to present our findings.", + "length": 102 + }, + { + "text": "She repeatedly raised her concerns but was reassured by a sonographer that the baby was 'just a lazy boy'.", + "length": 106 + }, + { + "text": "’ The letter went on : ‘Any comments made during the scan would have had the intention of reassurance.", + "length": 106 + }, + { + "text": "‘I refused because if there was a chance he could survive, no matter how disabled, then he deserved that.", + "length": 107 + }, + { + "text": "I knew from my other pregnancies it wasn’t normal for him not to move at all and I begged someone to listen.", + "length": 110 + }, + { + "text": "The condition causes the bones to fuse together and reduces lung function amongst other serious abnormalities.", + "length": 110 + }, + { + "text": "‘I kept telling them something was wrong because I hadn’t felt any kicks at all, but they wouldn’t listen.", + "length": 112 + }, + { + "text": "Lucy Copland, 27, was told by a sonographer that her baby did not move during scans simply because he was 'lazy'.", + "length": 113 + }, + { + "text": "But Lucy disputes this: ‘I begged them to listen to me from 20 weeks but it was easier to dismiss me as paranoid.", + "length": 115 + }, + { + "text": "She said doctors dismissed her concerns and made her feel 'paranoid' But Mrs Copland said she was denied more scans.", + "length": 116 + }, + { + "text": "I tried to dress him and his limbs were so rigid -  it was no wonder he had never moved or I had never felt a kick.", + "length": 116 + }, + { + "text": "’ Lucy was scanned 17 weeks into her pregnancy when she first raised concerns at Peterborough City Hospital last year.", + "length": 120 + }, + { + "text": "'They said I had a perfect little baby boy but I noticed he didn’t move at all during the scan and I thought it was odd.", + "length": 122 + }, + { + "text": "’ Lucy could not see movement when scans of baby Alfie (such as this one taken at 22 weeks) were carried out at hospital.", + "length": 123 + }, + { + "text": "‘When we finally found out what was wrong with Alfie I felt devastated and let down that nobody had listened to me earlier.", + "length": 125 + }, + { + "text": "‘I was an emotional wreck and kept begging for another scan to look for movement, but I was told it would be of no benefit.", + "length": 125 + }, + { + "text": "Her husband Ben, 27, cradled the newborn as he died and later had to break the heartbreaking news to his wife that he had died.", + "length": 127 + }, + { + "text": "Over the following three weeks and five days she called the maternity unit 22 times, expressing concern that her baby still had not moved.", + "length": 138 + }, + { + "text": "’ She was advised to terminate the pregnancy when it was confirmed her son had the genetic condition Fetal akinesia deformation sequence.", + "length": 139 + }, + { + "text": "She was sent to experts at Nottingham Hospital the following day and it was quickly confirmed her son was suffering with severe birth abnormalities.", + "length": 148 + }, + { + "text": "' Lucy knew from previous pregnancies (from left to right her children Ellie-Mae, Rhys and Tayla) that it wasn't normal for babies not to move or kick.", + "length": 151 + }, + { + "text": "It is not normal to get to 24 weeks pregnant and not feel your baby move,' said Ms Copland, who has three other children - Ellie-Mae,4, Rhys, 8, and Tayla, 6.", + "length": 158 + }, + { + "text": "' Finally she was called in for another scan at 26 weeks and an emergency referral was made when medics agreed it was unusual that the tot still wasn’t moving.", + "length": 161 + }, + { + "text": "Mrs Copland said: ‘I’d been assured all along he was perfect whenever I had raised concerns,  but in fact he was so poorly every joint in his body was fused.", + "length": 162 + }, + { + "text": "’ But tragically, just two weeks later, Mrs Copland s water’s went at 28 weeks and she was rushed into theatre for an emergency caesarian section on November 14th last year.", + "length": 177 + }, + { + "text": "'It is not appropriate to comment specifically about Lucy Copland’s experience, however the concerns of Mrs Copland were raised through the Trust’s formal complaints system.", + "length": 177 + }, + { + "text": "A grieving mother is taking legal action after hospital staff branded her unborn son ‘lazy’ - when he actually suffering from a rare genetic condition that had paralysed his limbs.", + "length": 184 + }, + { + "text": "’ In a letter to the family, the general manager of the hospital explained the baby’s condition might have been missed because it only became noticeable in the later stages of pregnancy.", + "length": 190 + }, + { + "text": "But the distraught mother-of-three refused to accept that theory and demanded further tests - which finally revealed  her unborn son had a rare muscular and skeletal disorder that meant he could not move his arms or legs.", + "length": 222 + }, + { + "text": "Chris Wilkinson, director of care quality and chief nurse at Peterborough and Stamford Hospitals NHS Foundation Trust, said: 'Firstly, the Trust would like to offer our most sincere condolences to the Copland family at this sad time.", + "length": 233 + }, + { + "text": "’ The letter from the general manager also apologised for this and said: ‘I apologise that you were made to feel that you were paranoid; this is unacceptable and the members of staff that you refer to in your letter have been seen … and their attitude and behaviour addressed.", + "length": 282 + }, + { + "text": "Peterborough City Hospital's general manager apologised for the blunder, saying that any comments made during Lucy's scan were intended to reassure her but that it was inappropriate that the sonographer suggested her baby was 'lazy' ‘I wanted to express what they had put me and my family through by ignoring my fears for so long.", + "length": 332 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.657766193151474 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:28.331906417Z", + "first_section_created": "2025-12-23T09:36:28.332253831Z", + "last_section_published": "2025-12-23T09:36:28.332665148Z", + "all_results_received": "2025-12-23T09:36:28.444493151Z", + "output_generated": "2025-12-23T09:36:28.444738061Z", + "total_processing_time_ms": 112, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 111, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:28.332253831Z", + "publish_time": "2025-12-23T09:36:28.33246614Z", + "first_worker_start": "2025-12-23T09:36:28.333020562Z", + "last_worker_end": "2025-12-23T09:36:28.43818Z", + "total_journey_time_ms": 105, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:28.332951059Z", + "start_time": "2025-12-23T09:36:28.333020562Z", + "end_time": "2025-12-23T09:36:28.333115266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:28.333157Z", + "start_time": "2025-12-23T09:36:28.333298Z", + "end_time": "2025-12-23T09:36:28.43818Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 104 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:28.333102365Z", + "start_time": "2025-12-23T09:36:28.333200369Z", + "end_time": "2025-12-23T09:36:28.333311174Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:28.333001561Z", + "start_time": "2025-12-23T09:36:28.333053863Z", + "end_time": "2025-12-23T09:36:28.333106465Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:28.332513142Z", + "publish_time": "2025-12-23T09:36:28.332665148Z", + "first_worker_start": "2025-12-23T09:36:28.333056063Z", + "last_worker_end": "2025-12-23T09:36:28.443469Z", + "total_journey_time_ms": 110, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:28.333102765Z", + "start_time": "2025-12-23T09:36:28.333146367Z", + "end_time": "2025-12-23T09:36:28.333207269Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:28.333192Z", + "start_time": "2025-12-23T09:36:28.33333Z", + "end_time": "2025-12-23T09:36:28.443469Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 110 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:28.333008461Z", + "start_time": "2025-12-23T09:36:28.333056263Z", + "end_time": "2025-12-23T09:36:28.333126366Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:28.333001561Z", + "start_time": "2025-12-23T09:36:28.333056063Z", + "end_time": "2025-12-23T09:36:28.333080264Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 214, + "min_processing_ms": 104, + "max_processing_ms": 110, + "avg_processing_ms": 107, + "median_processing_ms": 110, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3923, + "slowest_section_id": 1, + "slowest_section_time_ms": 110 + } +} diff --git a/data/output/00774f201f294218361cf27b7e3cb341548c3a89.json b/data/output/00774f201f294218361cf27b7e3cb341548c3a89.json new file mode 100644 index 0000000..09c31c4 --- /dev/null +++ b/data/output/00774f201f294218361cf27b7e3cb341548c3a89.json @@ -0,0 +1,632 @@ +{ + "file_name": "00774f201f294218361cf27b7e3cb341548c3a89.txt", + "total_words": 1639, + "top_n_words": [ + { + "word": "the", + "count": 68 + }, + { + "word": "a", + "count": 50 + }, + { + "word": "and", + "count": 45 + }, + { + "word": "to", + "count": 42 + }, + { + "word": "in", + "count": 39 + }, + { + "word": "she", + "count": 39 + }, + { + "word": "dolls", + "count": 37 + }, + { + "word": "i", + "count": 37 + }, + { + "word": "her", + "count": 28 + }, + { + "word": "is", + "count": 26 + } + ], + "sorted_sentences": [ + { + "text": "'You can hear them.", + "length": 19 + }, + { + "text": "He is only a child.", + "length": 19 + }, + { + "text": "It's a real mystery.", + "length": 20 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "\" But what do you expect?", + "length": 25 + }, + { + "text": "'I realised it was Michael.", + "length": 27 + }, + { + "text": "They feel the covers go back.", + "length": 29 + }, + { + "text": "\" 'The response was terrifying.", + "length": 31 + }, + { + "text": "I have no idea how they do this.", + "length": 32 + }, + { + "text": "'Ashley came weeks after Michael.", + "length": 33 + }, + { + "text": "Iris and Pearl - Died in their 20s.", + "length": 35 + }, + { + "text": "'They love to gossip to one another.", + "length": 36 + }, + { + "text": "Trena - A witch who died in her 30s.", + "length": 36 + }, + { + "text": "His spirit sneaks into my sons' beds.", + "length": 37 + }, + { + "text": "'Cost depends on quality and history.", + "length": 37 + }, + { + "text": "Dolls are not bought they are adopted.", + "length": 38 + }, + { + "text": "turned them back, but it kept flipping.", + "length": 39 + }, + { + "text": "'I think he used his energy to do this.", + "length": 39 + }, + { + "text": "'We know who it is so they are not scared.", + "length": 42 + }, + { + "text": "I make my own clothes rather than buy new.", + "length": 42 + }, + { + "text": "Touchingly, the doll's hair is also wispy.", + "length": 42 + }, + { + "text": "'I was overwhelmed and had to put her away.", + "length": 43 + }, + { + "text": "Isabelle - A young girl who died of cancer.", + "length": 44 + }, + { + "text": "'I then got Yulia, 33, who died in a plane crash.", + "length": 49 + }, + { + "text": "' 'Michael is very active, he will twist his head.", + "length": 50 + }, + { + "text": "He is only a baby and he wanted to watch cartoons.", + "length": 50 + }, + { + "text": "And dolls are not considered as toys by collectors.", + "length": 51 + }, + { + "text": "'I heard a doorbell once, and I don't even have one.", + "length": 52 + }, + { + "text": "' Mystical - Died when she was 103 and is very wise.", + "length": 52 + }, + { + "text": "Michael - A baby who died from cot death in the 1980s.", + "length": 54 + }, + { + "text": "'She told me she was happy to have been adopted by me.", + "length": 54 + }, + { + "text": "Agatha - Possibly evil, as her kids are scared of her.", + "length": 54 + }, + { + "text": "Baby Blue - A baby who died when he was ten months old.", + "length": 55 + }, + { + "text": "'I shout out: \"Ashley, Ashley,\" and the keys come back.", + "length": 55 + }, + { + "text": "'Some relatives don't like them, they make them nervous.", + "length": 56 + }, + { + "text": "'I am happy to go without other things to buy the dolls.", + "length": 56 + }, + { + "text": "'People think I am mad, but I have heard the dolls talk.", + "length": 56 + }, + { + "text": "'He regularly hides my car keys, because of the accident.", + "length": 57 + }, + { + "text": "Yulia - A woman who was 33 when she died in a plane crash.", + "length": 59 + }, + { + "text": "My mum noticed this too, and his facial expression changes.", + "length": 59 + }, + { + "text": "Haunted dolls is run by a group of paranormal investigators.", + "length": 60 + }, + { + "text": "Dolls cost between £10 and £500 depending on their history.", + "length": 61 + }, + { + "text": "'I have also attempted to hear voices though my mobile phone.", + "length": 61 + }, + { + "text": "' But a week later, Katrin tried to 'speak' to the doll again.", + "length": 62 + }, + { + "text": "They sound like humans, having conversations in the background.", + "length": 63 + }, + { + "text": "'They just have so much energy, but I wouldn't be without them.", + "length": 63 + }, + { + "text": "Heather and Gretchen - Both died when they were seven-years-old.", + "length": 64 + }, + { + "text": "'She is quite cheeky, she teams up with Michael to turn on taps.", + "length": 64 + }, + { + "text": "Ashley- A child who died more than three decades ago in a car crash.", + "length": 68 + }, + { + "text": "'I know when Pearl is around, she smells of fresh washing and bread.", + "length": 68 + }, + { + "text": "I have adopted them like I would a child and I would not let them go.", + "length": 69 + }, + { + "text": "'You can buy a doll, get it in the post, and the spirit comes with it.", + "length": 70 + }, + { + "text": "' Katrin has even sacrificed her basic needs in order to fund her habit.", + "length": 72 + }, + { + "text": "'Haunted dolls are not possessed as such, the doll is the spirit's home.", + "length": 72 + }, + { + "text": "I put my phone on record when the dolls are in the room and listen back.", + "length": 72 + }, + { + "text": "'Baby Blue came next in autumn 2011, he died when he was ten months old.", + "length": 72 + }, + { + "text": "'Mum refuses to go into my room, where the majority of the dolls are kept.", + "length": 74 + }, + { + "text": "Karin now says that she sources her dolls from a number of different places.", + "length": 76 + }, + { + "text": "'I usually get the dolls from collectors who have websites dedicated to them.", + "length": 77 + }, + { + "text": "'Usually my sons are not scared by the dolls, they like them and read to them.", + "length": 78 + }, + { + "text": "'They have purchased the dolls from people who didn't know the doll was haunted.", + "length": 80 + }, + { + "text": "His mum passed over in the crash and now he doesn't want me going out in my car.", + "length": 80 + }, + { + "text": "'There was a small fire in my flat a few weeks ago and I know it was the spirits.", + "length": 81 + }, + { + "text": "It was the purchase of Michael which showed Miss Reedik the dolls could 'misbehave.", + "length": 83 + }, + { + "text": "The Facebook page of Haunted Dolls, a shop in Shrewsbury, Shropshire has 8292 likes.", + "length": 84 + }, + { + "text": "'I have had boyfriends and they don't mind them as they don't believe they are haunted.", + "length": 87 + }, + { + "text": "'She said it is too scary, and is afraid of the spirits but I couldn't leave them alone.", + "length": 88 + }, + { + "text": "'When she arrived I realised straight away she was not a doll, she was a person,' she said.", + "length": 91 + }, + { + "text": "' Karin says that the dolls are even starting to play tricks on her children as well as her.", + "length": 92 + }, + { + "text": "'Isabelle was a young girl who died of cancer, she was a bald child as a result of treatment.", + "length": 93 + }, + { + "text": "' Each of Karin's dolls have a story which can affect the way the doll's behave in the house.", + "length": 93 + }, + { + "text": "'But they are a bit scared of my latest doll, Agatha, as her body is soft and her head heavy.", + "length": 93 + }, + { + "text": "Karin uses specialist ghost-hunting equipment to chat to her dolls, including voice recorders.", + "length": 94 + }, + { + "text": "Katrin says that as soon as Mystical arrived she began to notice spooky goings-on in her home.", + "length": 94 + }, + { + "text": "Karin Reedik has purchased 12 haunted dolls which she claims hold the spirits of dead people .", + "length": 94 + }, + { + "text": "' Pearl (left) and Iris (right) died in their twenties and apparently love to gossip together .", + "length": 95 + }, + { + "text": "' Katrin has to hire a babysitter while she is out to prevent the dolls from causing mischief .", + "length": 95 + }, + { + "text": "I'm not sure what they talk about, like most young women they don't want to tell their old mum.", + "length": 95 + }, + { + "text": "'When Michael sees me playing with my children I often sense him crying out: \"Mummy, play with me.", + "length": 98 + }, + { + "text": "' She says she is happy to spend a fair amount on the dolls because you are buying into their history.", + "length": 102 + }, + { + "text": "'She told me, in the same process through the TV, that she had died when she was 103 and is very wise.", + "length": 102 + }, + { + "text": "You can't put a price on them, because they're so much more than dolls, they're links to another world.", + "length": 103 + }, + { + "text": "'The spirit is attached to the doll and sometimes leaves it in the same way humans go to work every day.", + "length": 104 + }, + { + "text": "The full-time mother-of-two says her nights are taken up with chatting to her charges about how they died.", + "length": 106 + }, + { + "text": "' After coming to terms with Mystical's power, Katrin decided she wanted more dolls and has now bought 12.", + "length": 106 + }, + { + "text": "Ashley who is said to have died more than three decades ago in a crash, regularly hides Katrin's car keys .", + "length": 107 + }, + { + "text": "' Karin admits that people do have a hard time believing her but she insists that she is telling the truth.", + "length": 107 + }, + { + "text": "' Katrin says that Michael (right) is the most active of her dolls and that Agatha (left) is possibly evil .", + "length": 108 + }, + { + "text": "' Her next four dolls were Heather and Gretchen, both seven-years-old and Iris and Pearl, who died in their 20s.", + "length": 112 + }, + { + "text": "A shop in Shrewsbury, Shropshire, sells the dolls in the UK and scores of people chat about the subject on forums.", + "length": 114 + }, + { + "text": "The TV switched off and on, the lights flickered and I knew it was Mystical talking to me through the electricity.", + "length": 114 + }, + { + "text": "'That night I put her near the television I told her: \"If you're happy switch it off, if you are not happy leave it.", + "length": 116 + }, + { + "text": "' Despite having devoted much of her time and money to the dolls not everyone is as keen on the collection as Karin.", + "length": 116 + }, + { + "text": "Enthralled by this new discovery, in August 2010 Katrin ordered her first doll, Mystical, from the US for just £10.", + "length": 116 + }, + { + "text": "Katrin was first intrigued when she discovered through online shops and forums there were dolls with spiritual powers.", + "length": 118 + }, + { + "text": "One night Ashley revealed to me, through the use of a pendulum, that he died more than three decades ago in a car crash.", + "length": 120 + }, + { + "text": "' It is not just Michael who plays up his 'doll mum' Karin says she has witnissed other younger dolls becoming mischevious.", + "length": 123 + }, + { + "text": "Pictured: Yulia (left) was a woman who was 33 when she died in a plane crash and Trena (right) was a  witch who died in her 30s .", + "length": 130 + }, + { + "text": "But having the dolls around has not always been a smooth ride with Karin claiming that they were responsible for an accident in her home.", + "length": 137 + }, + { + "text": "' Kartin's last two dolls both arrived in May this year, Trena is a witch who died in her 30s and Agatha who is a portal for three spirits.", + "length": 139 + }, + { + "text": "' She does not like to leave the dolls alone, so her mum babysits if she has to go away over night but she tries to avoid them where possible.", + "length": 142 + }, + { + "text": "' Mystical was the first doll that Karin purchases and she said that she immediately knew she was a 'person' Each of Krin's dolls has a story.", + "length": 142 + }, + { + "text": "'I became interested in the spirit world in 2008, while living in Estonia, Northern Europe, because I felt the presence of a spirit,' she said.", + "length": 143 + }, + { + "text": "A woman has revealed how she has to hire a babysitter to look after her dolls – because she believes they are possessed by the ghosts of dead people.", + "length": 151 + }, + { + "text": "'This started my interest in the paranormal and when I moved to Glasgow in November 2008 I started researching spooky things and stumbled upon haunted dolls.", + "length": 157 + }, + { + "text": "The single mother to Christopher Robin, eight, and Karl, seven, says that she bought the dolls after developing a fascination with the supernatural while travelling.", + "length": 165 + }, + { + "text": "' 'Michael, the second doll I got in June 2011, is a baby who died from cot death in the 1980s and has since inhabited the body of a porcelain doll which I have now adopted into my care.", + "length": 186 + }, + { + "text": "Katrin Reedik, 33, from Glasgow, has spent thousands of pounds on what she claims are haunted dolls and believes they are possessed and if they are left alone they will cause mischief in her home.", + "length": 196 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5648704171180725 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:28.833417812Z", + "first_section_created": "2025-12-23T09:36:28.833743326Z", + "last_section_published": "2025-12-23T09:36:28.83409964Z", + "all_results_received": "2025-12-23T09:36:28.928617346Z", + "output_generated": "2025-12-23T09:36:28.928841955Z", + "total_processing_time_ms": 95, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 94, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:28.833743326Z", + "publish_time": "2025-12-23T09:36:28.834007336Z", + "first_worker_start": "2025-12-23T09:36:28.834633961Z", + "last_worker_end": "2025-12-23T09:36:28.909171Z", + "total_journey_time_ms": 75, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:28.834553558Z", + "start_time": "2025-12-23T09:36:28.834633961Z", + "end_time": "2025-12-23T09:36:28.834714465Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:28.834787Z", + "start_time": "2025-12-23T09:36:28.83495Z", + "end_time": "2025-12-23T09:36:28.909171Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 74 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:28.83459996Z", + "start_time": "2025-12-23T09:36:28.834675563Z", + "end_time": "2025-12-23T09:36:28.834876271Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:28.83460196Z", + "start_time": "2025-12-23T09:36:28.834666263Z", + "end_time": "2025-12-23T09:36:28.834709664Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:28.834037737Z", + "publish_time": "2025-12-23T09:36:28.83409964Z", + "first_worker_start": "2025-12-23T09:36:28.834704964Z", + "last_worker_end": "2025-12-23T09:36:28.92771Z", + "total_journey_time_ms": 93, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:28.834683663Z", + "start_time": "2025-12-23T09:36:28.834739766Z", + "end_time": "2025-12-23T09:36:28.834829869Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:28.834973Z", + "start_time": "2025-12-23T09:36:28.835111Z", + "end_time": "2025-12-23T09:36:28.92771Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 92 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:28.834689264Z", + "start_time": "2025-12-23T09:36:28.834736966Z", + "end_time": "2025-12-23T09:36:28.834829269Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:28.83460206Z", + "start_time": "2025-12-23T09:36:28.834704964Z", + "end_time": "2025-12-23T09:36:28.834741066Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 166, + "min_processing_ms": 74, + "max_processing_ms": 92, + "avg_processing_ms": 83, + "median_processing_ms": 92, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4302, + "slowest_section_id": 1, + "slowest_section_time_ms": 93 + } +} diff --git a/data/output/00777365b5dc6463a07739712b86fb7aa3ac5afa.json b/data/output/00777365b5dc6463a07739712b86fb7aa3ac5afa.json new file mode 100644 index 0000000..137b6c3 --- /dev/null +++ b/data/output/00777365b5dc6463a07739712b86fb7aa3ac5afa.json @@ -0,0 +1,210 @@ +{ + "file_name": "00777365b5dc6463a07739712b86fb7aa3ac5afa.txt", + "total_words": 186, + "top_n_words": [ + { + "word": "the", + "count": 14 + }, + { + "word": "steffe", + "count": 8 + }, + { + "word": "goal", + "count": 5 + }, + { + "word": "his", + "count": 5 + }, + { + "word": "milan", + "count": 5 + }, + { + "word": "of", + "count": 5 + }, + { + "word": "in", + "count": 4 + }, + { + "word": "inter", + "count": 4 + }, + { + "word": "on", + "count": 4 + }, + { + "word": "s", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Follow @@Anthony_Hay .", + "length": 22 + }, + { + "text": "Anthony Hay for MailOnline .", + "length": 28 + }, + { + "text": "Steffe spent the entire 2013/14 season on loan at AC Chievo Verona.", + "length": 67 + }, + { + "text": "VIDEO Scroll down to watch Inter midfielder Demetrio Steffe's goal against AC Milan .", + "length": 85 + }, + { + "text": "In the net: Steffe's effort landed in the net to the delight of the young midfielder .", + "length": 86 + }, + { + "text": "Cheeky chip: Steffe sends his lobbed effort over the head of the onrushing goalkeeper .", + "length": 88 + }, + { + "text": "Bearing down on goal: Demetrio Steffe was played through by one of his Inter Milan team-mates .", + "length": 96 + }, + { + "text": "He instead kept the ball in the air by taking two touches before lobbing the oncoming goalkeeper.", + "length": 97 + }, + { + "text": "Steffe raced onto a lobbed pass from his Inter Milan team-mate which allowed him a clear run on goal.", + "length": 101 + }, + { + "text": "However Steffe decided to be incredibly creative instead of simply controlling the ball before running through on goal.", + "length": 119 + }, + { + "text": "The 18-year-old will be hoping his outstanding goal will be brought to the attention of the club's senior manager Walter Mazzarri.", + "length": 130 + }, + { + "text": "Inter Milan Under 19 starlet Demetrio Steffe scored an incredibly skilful chip in his side's emphatic 6-0 win against rivals AC Milan.", + "length": 134 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.44315773248672485 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:29.335307623Z", + "first_section_created": "2025-12-23T09:36:29.337081095Z", + "last_section_published": "2025-12-23T09:36:29.337261702Z", + "all_results_received": "2025-12-23T09:36:29.402218618Z", + "output_generated": "2025-12-23T09:36:29.402338922Z", + "total_processing_time_ms": 67, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:29.337081095Z", + "publish_time": "2025-12-23T09:36:29.337261702Z", + "first_worker_start": "2025-12-23T09:36:29.337803724Z", + "last_worker_end": "2025-12-23T09:36:29.401266Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:29.337782023Z", + "start_time": "2025-12-23T09:36:29.337834825Z", + "end_time": "2025-12-23T09:36:29.337856326Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:29.337992Z", + "start_time": "2025-12-23T09:36:29.338125Z", + "end_time": "2025-12-23T09:36:29.401266Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:29.337763522Z", + "start_time": "2025-12-23T09:36:29.337835225Z", + "end_time": "2025-12-23T09:36:29.337877827Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:29.337745721Z", + "start_time": "2025-12-23T09:36:29.337803724Z", + "end_time": "2025-12-23T09:36:29.337848725Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1069, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/0077df2c882be13b6dd010e394831819556417b3.json b/data/output/0077df2c882be13b6dd010e394831819556417b3.json new file mode 100644 index 0000000..afe8701 --- /dev/null +++ b/data/output/0077df2c882be13b6dd010e394831819556417b3.json @@ -0,0 +1,314 @@ +{ + "file_name": "0077df2c882be13b6dd010e394831819556417b3.txt", + "total_words": 738, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "on", + "count": 20 + }, + { + "word": "to", + "count": 19 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "of", + "count": 15 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "cosby", + "count": 13 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "you", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "co.", + "length": 3 + }, + { + "text": "dailymail.", + "length": 10 + }, + { + "text": "Firebrand U.", + "length": 12 + }, + { + "text": "Read more: http://www.", + "length": 22 + }, + { + "text": "' Scroll down for video .", + "length": 25 + }, + { + "text": "You said you wouldn't do it.", + "length": 28 + }, + { + "text": "You've just raped Bill Cosby.", + "length": 29 + }, + { + "text": "That's media rape right there.", + "length": 30 + }, + { + "text": "That's media rape right there.", + "length": 30 + }, + { + "text": "the AP had the integrity to not ask.", + "length": 36 + }, + { + "text": "Since when does your 'no' mean 'yes'?", + "length": 37 + }, + { + "text": "Do you know the definition of 'no,' sir?", + "length": 40 + }, + { + "text": "You just did it and then you blamed it on him.", + "length": 46 + }, + { + "text": "My gosh, maybe we should have a lesson on rape.", + "length": 47 + }, + { + "text": "But the AP concluded by saying: 'The interview was on record.", + "length": 61 + }, + { + "text": "html#ixzz3JhT5IreR Follow us: @MailOnline on Twitter | DailyMail on Facebook .", + "length": 78 + }, + { + "text": "'Tell your boss the reason why we didn't say that upfront was because we thought ...", + "length": 84 + }, + { + "text": "But he is later recorded asking the reporter to 'scuttle' that part of the interview.", + "length": 85 + }, + { + "text": "TV Land has also announced they will no longer air reruns of The Cosby Show on the network.", + "length": 91 + }, + { + "text": "The AP published the interview on Wednesday in which the comedian refuses to comment on the allegations.", + "length": 104 + }, + { + "text": "When asked to speak about these claims in an AP interview, Cosby said he had 'no response' and 'no comment.", + "length": 107 + }, + { + "text": "' The interview took place on November 6 following a string of allegations by women claiming he had drugged and raped them.", + "length": 123 + }, + { + "text": "The AP made no agreement to avoid questions about the allegations or to withhold publishing any of his comments at any time.", + "length": 124 + }, + { + "text": "Five women have come forward in the past month and claimed that they were drugged and raped by Cosby, four of them when they were teenagers.", + "length": 140 + }, + { + "text": "uk/news/article-2843575/Bill-Cosby-returns-stage-receives-standing-ovation-benefit-women-s-organization-new-actress-comes-forward-say-forced-oral-sex.", + "length": 150 + }, + { + "text": "One of the accusers is supermodel Janice Dickinson, who claims that during a dinner in 1982 the actor gave her a pill and when she woke up he was on top of her.", + "length": 160 + }, + { + "text": "' First response: On Wednesday, the AP published an interview in which a reporter asked Cosby to respond to allegations that he's raped more than a dozen women .", + "length": 161 + }, + { + "text": "Cosby has also had scheduled appearances on talk shows including The Queen Latifah Show and Late Night With David Letterman cancelled in the wake of these allegations.", + "length": 167 + }, + { + "text": "' Also coming forward recently is Carla Ferrigno, wife of Incredibly Hulk star Lou Ferrigno, who says Cosby tried to sexually assault her at a part at his house in 1967.", + "length": 169 + }, + { + "text": "' At the end of the interview, on November 6, Cosby tells the interviewer: 'We thought, because it was AP, that it wouldn't be necessary to go over that question with you.", + "length": 171 + }, + { + "text": "And today, Conservative loudmouth and former Fox News host Beck, 49, defended the comedian in a 15-minute rant on his eponymous radio show, declaring, 'You want to talk about rape?", + "length": 180 + }, + { + "text": "'Scuttled': Appearing alongside wife Camille, the comedian refused to comment on the allegations, but is later recorded asking the reporter to 'scuttle' that part of the interview .", + "length": 181 + }, + { + "text": "What's more, Cosby allegedly tried to use a friend to help court Ferrigno, and allegedly made his move on the former Playboy Bunny just moments after his own wife, Camille, left the room.", + "length": 187 + }, + { + "text": "Talking about rape: Conservative loudmouth and former Fox News host Beck, 49, defended the comedian in a 15-minute rant on his eponymous radio show, declaring, 'You want to talk about rape?", + "length": 189 + }, + { + "text": "talkshow host Glenn Beck has accused the Associated Press news agency of 'raping' Bill Crosby after it published an interview in which it asks him about claims he sexually assaulted more than a dozen women.", + "length": 206 + }, + { + "text": "The other four ladies; Barbara Bowman, Joan Tarshis, Tamara Green and Therese Serignese have all shared remarkably similar stories in which they claim to have shared a drink or a pill with Cosby and then woken up after or while they say he was sexually assaulting them.", + "length": 269 + }, + { + "text": "' These allegations are already having a major impact on his work however, with Netflix postponing the airing of his new comedy special, Bill Cosby 77, which was due to air the day after Thanksgiving, and NBC pulling the plug on a comedy project they were developing with the Cosby Show star.", + "length": 292 + }, + { + "text": "Outrage: Firebrand talkshow host Glenn Beck (left) accused the Associated Press of 'raping' Bill Crosby after it published an interview in which its reporter asks him about the sex-assault allegations engulfing him (right) Referring to the AP's question, he went on: 'You said you would not [ask] that.", + "length": 302 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7181962728500366 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:29.838047068Z", + "first_section_created": "2025-12-23T09:36:29.83835138Z", + "last_section_published": "2025-12-23T09:36:29.83859519Z", + "all_results_received": "2025-12-23T09:36:29.903121088Z", + "output_generated": "2025-12-23T09:36:29.903276195Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:29.83835138Z", + "publish_time": "2025-12-23T09:36:29.83859519Z", + "first_worker_start": "2025-12-23T09:36:29.839146812Z", + "last_worker_end": "2025-12-23T09:36:29.900296Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:29.839110211Z", + "start_time": "2025-12-23T09:36:29.839168513Z", + "end_time": "2025-12-23T09:36:29.839260517Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:29.839368Z", + "start_time": "2025-12-23T09:36:29.839514Z", + "end_time": "2025-12-23T09:36:29.900296Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:29.83908781Z", + "start_time": "2025-12-23T09:36:29.839156213Z", + "end_time": "2025-12-23T09:36:29.839254717Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:29.839069209Z", + "start_time": "2025-12-23T09:36:29.839146812Z", + "end_time": "2025-12-23T09:36:29.839187314Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4205, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/007880654accdde3b5cdbf102d44104c9c62f3ea.json b/data/output/007880654accdde3b5cdbf102d44104c9c62f3ea.json new file mode 100644 index 0000000..2c3f230 --- /dev/null +++ b/data/output/007880654accdde3b5cdbf102d44104c9c62f3ea.json @@ -0,0 +1,500 @@ +{ + "file_name": "007880654accdde3b5cdbf102d44104c9c62f3ea.txt", + "total_words": 1317, + "top_n_words": [ + { + "word": "the", + "count": 73 + }, + { + "word": "of", + "count": 42 + }, + { + "word": "a", + "count": 36 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "and", + "count": 28 + }, + { + "word": "in", + "count": 26 + }, + { + "word": "that", + "count": 24 + }, + { + "word": "it", + "count": 17 + }, + { + "word": "said", + "count": 16 + }, + { + "word": "we", + "count": 16 + } + ], + "sorted_sentences": [ + { + "text": "A.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "K.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "com.", + "length": 4 + }, + { + "text": "television.", + "length": 11 + }, + { + "text": "com, tformers.", + "length": 14 + }, + { + "text": "com and tfw2005.", + "length": 16 + }, + { + "text": "Names would change.", + "length": 19 + }, + { + "text": "\" Before long, Shout!", + "length": 21 + }, + { + "text": "\"The voices were awful.", + "length": 23 + }, + { + "text": "They don't mess around.", + "length": 23 + }, + { + "text": "\" Albert and others from Seibertron.", + "length": 36 + }, + { + "text": "I appreciate that they're staying cautious.", + "length": 43 + }, + { + "text": "com -- one of many fan sites like tfarchive.", + "length": 44 + }, + { + "text": "com podcaster Bob King of Ashley, Pennsylvania.", + "length": 47 + }, + { + "text": "\"We saw the releases that had come out in the U.", + "length": 48 + }, + { + "text": "com founder Ryan Yzquierdo, from Chicago, Illinois.", + "length": 51 + }, + { + "text": "\" Early fan reaction to this was not entirely positive.", + "length": 55 + }, + { + "text": "\" Ward pointed out other, more subtle culture differences.", + "length": 58 + }, + { + "text": "have with fans that this release has seen the light of day.", + "length": 59 + }, + { + "text": "series only touched on this concept briefly before it ended).", + "length": 61 + }, + { + "text": "\"Later in the series, another major character bites the dust.", + "length": 61 + }, + { + "text": "Soundwave disappears and is simply replaced by 'New Soundwave.", + "length": 62 + }, + { + "text": "\"It's interesting to watch those characters change culturally.", + "length": 62 + }, + { + "text": "\" There are some fans who simply do not want to watch subtitles.", + "length": 64 + }, + { + "text": "We gave it to a really good captioning and subtitling house in L.", + "length": 65 + }, + { + "text": "Such is the relationship that geek-friendly companies like Shout!", + "length": 65 + }, + { + "text": "The Transformers first became a pop cultural phenomenon in the U.", + "length": 65 + }, + { + "text": "From what I understand, Japanese children prefer having human drivers.", + "length": 70 + }, + { + "text": "\" As with any anime import, there is the eternal debate of \"subbed versus dubbed.", + "length": 81 + }, + { + "text": "\"This is the first time we've gotten a legitimate release of this Japanese series.", + "length": 82 + }, + { + "text": "\"When we heard them, it really got to a point where it was comical,\" Ward explaned.", + "length": 83 + }, + { + "text": "Factory releases of other series, such as \"G1\" and the 1990s \"Transformers: Beast Wars.", + "length": 87 + }, + { + "text": "These were things that were almost, for lack of a better word, lost to American audiences.", + "length": 90 + }, + { + "text": "Casual fans who just love Transformers will be really surprised by the quality of the show.", + "length": 91 + }, + { + "text": "\"It was clear that the folks who had translated did not use English as their first language.", + "length": 92 + }, + { + "text": "\"I'm really excited about it,\" said Michael Albert of Bear, Delaware, moderator of Seibertron.", + "length": 94 + }, + { + "text": "com -- said that these releases were one of the most sought-after items by fans over the years.", + "length": 95 + }, + { + "text": "in the mid-'80s, based on a pair of toy lines from Japan's Takara company, Microman and Diaclone.", + "length": 97 + }, + { + "text": "\" In the case of \"Headmasters\" however, the episodes are not dubbed, but have brand-new subtitles.", + "length": 98 + }, + { + "text": "Manning has communicated often with a \"vibrant\" community of fans on the Adult Swim message boards.", + "length": 99 + }, + { + "text": "\" \"Some major characters die early on,\" said Matt Brown of Canton, Michigan, a podcaster at Seibertron.", + "length": 103 + }, + { + "text": "\"After 'Headmasters' is over, the Transformers are not so much sentient robots but having human drivers.", + "length": 104 + }, + { + "text": "\"You can't actually sit there and watch the dubs, unless you like drinking while watching 'Transformers.", + "length": 104 + }, + { + "text": "\" \"The Autobots and Decepticons [in these series] are, no pun intended a well ordered machine,\" he said.", + "length": 104 + }, + { + "text": "(CNN) -- In January 2010, a hardcore \"Transformers\" fan going by \"gaastra\" on a message board for Shout!", + "length": 104 + }, + { + "text": "and they did a brilliant job of accurately translating the dialogue and understanding what was being said.", + "length": 106 + }, + { + "text": "We got those scripts approved by Hasbro (owners of 'Transformers'), and we're really happy with the outcome.", + "length": 108 + }, + { + "text": "\"We definitely look at what people are talking about online, what people are watching and buying online, too.", + "length": 109 + }, + { + "text": "The translation is about as close as one can get to an accurate translation of what's being said in Japanese.", + "length": 109 + }, + { + "text": "Optimus Prime will be referred in more of a formal manner, he'll be 'Commander' or something among those lines.", + "length": 111 + }, + { + "text": "\" \"[The Takara anime series] really bends towards a lot of things that Japanese children want to see,\" said Albert.", + "length": 115 + }, + { + "text": "and Australia and saw that the subtitles there weren't entirely accurate, and in some cases didn't make sense,\" said Ward.", + "length": 122 + }, + { + "text": "\"There was a small bit of disappointment that the project couldn't secure enough funding to do a brand-new English dub,\" said Seibertron.", + "length": 137 + }, + { + "text": "Factory DVD producer Brian Ward was asking fans how much interest they would have in such a release and what they would like to see on it.", + "length": 138 + }, + { + "text": "\"I think they're more likely to reach a more mainstream audience, and we're always hoping to get a larger audience excited about anime,\" she said.", + "length": 146 + }, + { + "text": "\" As for the importance of the release of \"Headmasters\" specifically, Ward said, \"It gives [fans] something definitive, something approved by Hasbro.", + "length": 149 + }, + { + "text": "\"Where the G1 characters would call Optimus Prime 'optimus' -- they were pretty casual with their leader -- the Japanese approach it very differently.", + "length": 150 + }, + { + "text": "\" Indeed, this is just the latest example of fans communicating directly with companies to make a difference in what material is released to the public.", + "length": 152 + }, + { + "text": "\" \"People who have not seen it before might be disappointed in the subtitles, but rest assured [that the existing dubs] are that horrible,\" said Seibertron.", + "length": 156 + }, + { + "text": "You would have to get recordings of them burned onto DVDs, or import them from Australia or Europe, or find a laser-disc player from the 1980s to watch this.", + "length": 157 + }, + { + "text": "18 months later, the first of the \"Takara shows,\" known as \"Transformers: Headmasters,\" was officially released Tuesday for the first time in the United States.", + "length": 160 + }, + { + "text": "Factory (a DVD and CD company \"for the discerning pop culture geek\") asked the simple question, \"What would it take to get the Takara shows a release in America?", + "length": 161 + }, + { + "text": "\" The other Takara series will be released in the near future as \"The Japanese Collection,\" though a production delay has postponed its release for several weeks.", + "length": 162 + }, + { + "text": "\"I think hearing it in your native language allows you to get more absorbed in the action, and to pay more attention to the animation, which is often just gorgeous.", + "length": 164 + }, + { + "text": "\"I kind of share that feeling, but I also know that this isn't going to be a very mainstream release, and for them to spend that much money would be kind of a gamble.", + "length": 166 + }, + { + "text": "From that, we're always looking for shows that we think will appeal to our audience, and then we pass it around the office -- several of us are anime fans, as well, so we make our own focus group.", + "length": 196 + }, + { + "text": "'\" Albert said that the Takara series' legacy extends beyond a mere curiosity: \"The themes are non-Western compared to what Hasbro does now but some of the design aesthetics do play into what we see today.", + "length": 205 + }, + { + "text": "Despite those delays and some early hesitation from the subtitle-phobic, Yzquierdo said that most fans are just excited to check the shows out: \"This is something that I never thought I would see released in the States.", + "length": 219 + }, + { + "text": "Kim Manning, head programmer for Adult Swim (which is owned by Time Warner, also owner of CNN), and one of the top people responsible for what anime series are seen in the United States, said that every effort is made to get dubbed versions.", + "length": 241 + }, + { + "text": "\"It really is interesting to see just how the Japanese and their culture played into a series that for most of us we've only known as straightforward Autobots vs Decepticons,\" said Ward, a \"Transformers\" fan himself who has produced all of the Shout!", + "length": 250 + }, + { + "text": "' It was almost the equivalent of watching a Saturday afternoon martial arts movie dub, and that's certainly not something we wanted to do with a property as beloved as 'Transformers,' so we opted out of original dubs and went for brand new subtitles.", + "length": 251 + }, + { + "text": "com, a \"Transformers\" community site that boasts as many as 300,000 page views per day (mostly by fans whose interest in \"Robots in Disguise\" goes well beyond one of the biggest box office successes of the year, \"Transformers: Dark of the Moon,\" and its predecessors).", + "length": 268 + }, + { + "text": "When interest in the characters faded after a very short-lived fourth season of the original \"G1\" animated series, it came full circle with the Japanese producing \"Headmasters,\" the first in a series of anime which took the characters in a wild new direction, involving new characters who were able to detach their heads, which were entirely separate characters (the U.", + "length": 369 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6163009107112885 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:30.339387956Z", + "first_section_created": "2025-12-23T09:36:30.341675349Z", + "last_section_published": "2025-12-23T09:36:30.342236071Z", + "all_results_received": "2025-12-23T09:36:30.453088635Z", + "output_generated": "2025-12-23T09:36:30.453297243Z", + "total_processing_time_ms": 113, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 110, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:30.341675349Z", + "publish_time": "2025-12-23T09:36:30.341943559Z", + "first_worker_start": "2025-12-23T09:36:30.342305074Z", + "last_worker_end": "2025-12-23T09:36:30.450106Z", + "total_journey_time_ms": 108, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:30.342295974Z", + "start_time": "2025-12-23T09:36:30.342381577Z", + "end_time": "2025-12-23T09:36:30.342499882Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:30.342545Z", + "start_time": "2025-12-23T09:36:30.342686Z", + "end_time": "2025-12-23T09:36:30.450106Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 107 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:30.342229771Z", + "start_time": "2025-12-23T09:36:30.342306274Z", + "end_time": "2025-12-23T09:36:30.342429779Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:30.342230671Z", + "start_time": "2025-12-23T09:36:30.342305074Z", + "end_time": "2025-12-23T09:36:30.342354476Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:30.342050764Z", + "publish_time": "2025-12-23T09:36:30.342236071Z", + "first_worker_start": "2025-12-23T09:36:30.342393577Z", + "last_worker_end": "2025-12-23T09:36:30.43956Z", + "total_journey_time_ms": 97, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:30.342632687Z", + "start_time": "2025-12-23T09:36:30.342679089Z", + "end_time": "2025-12-23T09:36:30.342733891Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:30.34291Z", + "start_time": "2025-12-23T09:36:30.343042Z", + "end_time": "2025-12-23T09:36:30.43956Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 96 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:30.342555684Z", + "start_time": "2025-12-23T09:36:30.342617887Z", + "end_time": "2025-12-23T09:36:30.34271389Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:30.342347276Z", + "start_time": "2025-12-23T09:36:30.342393577Z", + "end_time": "2025-12-23T09:36:30.342430179Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 203, + "min_processing_ms": 96, + "max_processing_ms": 107, + "avg_processing_ms": 101, + "median_processing_ms": 107, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3842, + "slowest_section_id": 0, + "slowest_section_time_ms": 108 + } +} diff --git a/data/output/00788140c85a6d0c87937812055f597148a186d5.json b/data/output/00788140c85a6d0c87937812055f597148a186d5.json new file mode 100644 index 0000000..5065ea9 --- /dev/null +++ b/data/output/00788140c85a6d0c87937812055f597148a186d5.json @@ -0,0 +1,536 @@ +{ + "file_name": "00788140c85a6d0c87937812055f597148a186d5.txt", + "total_words": 1465, + "top_n_words": [ + { + "word": "the", + "count": 86 + }, + { + "word": "to", + "count": 43 + }, + { + "word": "of", + "count": 39 + }, + { + "word": "in", + "count": 37 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "a", + "count": 25 + }, + { + "word": "england", + "count": 22 + }, + { + "word": "is", + "count": 20 + }, + { + "word": "s", + "count": 20 + }, + { + "word": "for", + "count": 18 + } + ], + "sorted_sentences": [ + { + "text": "02.", + "length": 3 + }, + { + "text": "lost 4-1.", + "length": 9 + }, + { + "text": "’ Quite.", + "length": 10 + }, + { + "text": "Just move on.", + "length": 13 + }, + { + "text": "Frustrated, James?", + "length": 18 + }, + { + "text": "Then join the club.", + "length": 19 + }, + { + "text": "Fred fed up with KP .", + "length": 21 + }, + { + "text": "The selectors can be bold.", + "length": 26 + }, + { + "text": "No, it was his frustration.", + "length": 27 + }, + { + "text": "Again, there is logic here.", + "length": 27 + }, + { + "text": "England too clever by half .", + "length": 28 + }, + { + "text": "THAT WAS THE WEEK THAT WAS .", + "length": 28 + }, + { + "text": "‘But he just has to let it go.", + "length": 32 + }, + { + "text": "Thanks, in other words, a bunch.", + "length": 32 + }, + { + "text": "com by 5pm on Monday, December 15.", + "length": 35 + }, + { + "text": "They know what they’ll be getting.", + "length": 36 + }, + { + "text": "Michael Clarke, all-Australian hero .", + "length": 37 + }, + { + "text": "Anything else is just wishful thinking.", + "length": 39 + }, + { + "text": "So let’s hand over to Andrew Flintoff.", + "length": 40 + }, + { + "text": "The armband acted as an adrenaline shot.", + "length": 40 + }, + { + "text": "25 is better than England’s overall 1.", + "length": 40 + }, + { + "text": "WIN ALL EIGHT ISSUES OF THE NIGHTWATCHMAN!", + "length": 42 + }, + { + "text": "They said Joe Root was being messed around.", + "length": 43 + }, + { + "text": "You may not agree with it – and most do not.", + "length": 46 + }, + { + "text": "‘I like Kevin,’ said Flintoff on his podcast.", + "length": 49 + }, + { + "text": "Please email your answer to lawrencebooth@hotmail.", + "length": 50 + }, + { + "text": "Yes, a late change of plan could end up backfiring.", + "length": 51 + }, + { + "text": "But when it comes to the captain, they lose their nerve.", + "length": 56 + }, + { + "text": "On a turning track, Tredwell is in England’s best team.", + "length": 57 + }, + { + "text": "Victory there meant the decision attracted little scrutiny.", + "length": 59 + }, + { + "text": "They probably shouldn’t have been asked in the first place.", + "length": 61 + }, + { + "text": "They weren’t convinced by Moeen Ali at the top of the one-day order.", + "length": 70 + }, + { + "text": "But it must be exhausting sat in front of the television hating everyone.", + "length": 73 + }, + { + "text": "Critics said Gary Ballance should not have batted at No 3 in the Test side.", + "length": 75 + }, + { + "text": "But to leave him out of Sunday’s match in Colombo was too clever by half.", + "length": 75 + }, + { + "text": "No wonder Eoin Morgan sounded nonplussed in the post-match press conference.", + "length": 76 + }, + { + "text": "The best way of preparing for the World Cup is to start winning a few matches.", + "length": 78 + }, + { + "text": "Yet his 47-ball 62 on Sunday took his record as one-day leader to 427 runs at 71.", + "length": 81 + }, + { + "text": "To express surprise at the power of his hitting was to admit to being out of touch.", + "length": 83 + }, + { + "text": "- How many County Championship games in 2014 did Northamptonshire lose by an innings?", + "length": 85 + }, + { + "text": "But the thought process is not quite the finger-crossing exercise some have discerned.", + "length": 86 + }, + { + "text": "Built like a pole he may not be, but in that moment Taylor was nothing less than totemic.", + "length": 89 + }, + { + "text": "The problem England have is that the retention of Cook is not completely devoid of logic.", + "length": 89 + }, + { + "text": "England's squad are preparing for the fifth ODI against Sri Lanka in Pallekele on Wednesday .", + "length": 93 + }, + { + "text": "One argument has been that Eoin Morgan has been in no sort of form to replace Cook as captain.", + "length": 94 + }, + { + "text": "To point this out is not to be part of some conspiratorial nexus between the ECB and the press.", + "length": 95 + }, + { + "text": "James Taylor (right) is another player who made the most of Cook's ban, scoring 90 in Colombo .", + "length": 95 + }, + { + "text": "Michael Clarke walks back to the Adelaide Oval dressing room after retiring hurt against India .", + "length": 96 + }, + { + "text": "Instead, they put the needs of a struggling all-rounder ahead of the best interests of the side.", + "length": 96 + }, + { + "text": "For more information on how to make The Nightwatchman the perfect Christmas gift, CLICK HERE: .", + "length": 96 + }, + { + "text": "Kevin Pietersen continues to watch - and criticise - England following his sacking from the team .", + "length": 98 + }, + { + "text": "On February 14 in Melbourne, Australia's opening bowlers would rather be faced with Cook than Hales.", + "length": 100 + }, + { + "text": "In fact, England should consider one or two of the so-called risks they have already taken this year.", + "length": 101 + }, + { + "text": "Last week, you’ll be aware, Kevin Pietersen described Peter Moores on Twitter as a ‘woodpecker’.", + "length": 102 + }, + { + "text": "Alastair Cook (right) and Eoin Morgan are both viable options to lead England in one-day intrnationals .", + "length": 104 + }, + { + "text": "Cook returns to lead the team after serving a one-match ban for England's slow over-rate in the series .", + "length": 104 + }, + { + "text": "If a journalist dares to criticise Pietersen, it is apparently because he or she has got an axe to grind.", + "length": 105 + }, + { + "text": "The problem – and it has held English cricket back at various stages over the years – is conservatism.", + "length": 106 + }, + { + "text": "Morgan has thrived with the bat in Cook's absence and is presenting a case to be captain at the World Cup .", + "length": 107 + }, + { + "text": "James Tredwell has been left to bowl in the nets by England - but the spinner deserves a place in the team .", + "length": 108 + }, + { + "text": "It made some kind of sense for England to omit James Tredwell from the third one-day international in Hambantota.", + "length": 113 + }, + { + "text": "In 66 one-day internationals as captain, he averages nearly 40, with a strike-rate of 80; his win/loss ratio of 1.", + "length": 114 + }, + { + "text": "‘We know you don’t like Peter Moores, we know you got sacked and we know you don’t play for England any more.", + "length": 115 + }, + { + "text": "The boundaries had been brought in, there was rain in the air, and England knew what he brought to the perennial party.", + "length": 119 + }, + { + "text": "For more musings on England’s tour of Sri Lanka and other matters cricketing, please follow us on Twitter @the_topspin.", + "length": 121 + }, + { + "text": "The ECB top brass think it is too late to change, while promising to change should England flop in Australia and New Zealand.", + "length": 125 + }, + { + "text": "Yet in Colombo’s stultifying heat and humidity he did for England what he has been doing for some time for Nottinghamshire.", + "length": 125 + }, + { + "text": "And that ratio of five victories for every four defeats will not win you a World Cup – let alone get you beyond the quarter-finals.", + "length": 133 + }, + { + "text": "But the safety-first option – and that is what Cook, for all his qualities, represents – is less likely to take the rest of the world by surprise.", + "length": 150 + }, + { + "text": "For it took England until the fourth one-dayer of this series to stumble across the batting line-up most likely to unnerve opponents at the World Cup.", + "length": 150 + }, + { + "text": "Moores’s incessant coaching style, implied KP, was to blame for Eoin Morgan’s recent struggles (this was before Morgan made 62 in 47 balls in Colombo).", + "length": 155 + }, + { + "text": "We keep hearing that conditions in Australia and New Zealand are different from those in Sri Lanka, as if a change of scene will automatically benefit England.", + "length": 159 + }, + { + "text": "An on-song Morgan will be central to England’s chances at the World Cup, and so far the evidence suggests he’s more likely to be singing sweetly if he’s captain.", + "length": 167 + }, + { + "text": "England knew in advance that the Premadasa pitch would be slow and low, but were swayed by the desire to have another look at Ben Stokes, who was duly out for six and bowled two overs for 21.", + "length": 191 + }, + { + "text": "This columnist wrote a few months ago that Cook should have quit one-day cricket after the Test win against India, put his feet up, and prepared for 17 Tests in nine months starting in April.", + "length": 191 + }, + { + "text": "His frustration lay in his treatment by the selectors, who had previously granted him only two one-day internationals in Ireland and a couple of Tests against Dale Steyn, Morne Morkel and Vernon Philander.", + "length": 205 + }, + { + "text": "They admired the resilience Cook showed during the Test series against India, and think there is more chance of him repeating the trick with the one-day team than there is of new-look side lifting the trophy.", + "length": 208 + }, + { + "text": "But an average of 40 and a strike-rate of 80 are reasonable figures only when compared to the England one-day captains and openers who came before him; England’s fans want them to keep pace with a changing game.", + "length": 213 + }, + { + "text": "The Top Spin has got hold of a special bundle of the much-loved cricket quarterly – all eight issues of it – and is offering it as a prize to the first correct answer drawn at random to this uplifting question: .", + "length": 216 + }, + { + "text": "When Michael Clarke walked off the Adelaide Oval earlier on Tuesday, having retired hurt on 60 during the opening day of the delayed Test series against India because of his bad back, he was treated to a standing ovation.", + "length": 221 + }, + { + "text": "Instead, his own stubbornness (both a strength and a weakness) and the ECB’s fearfulness have recreated the mood that characterised the summer, when Cook’s leadership filled the back pages and blogs on an almost daily basis.", + "length": 228 + }, + { + "text": "When, on Monday morning, James Taylor reflected on his 90 the day before at the Premadasa, what lingered was not his answers to the inevitable questions about his height or about the patronising dismissal of his talents by one or two.", + "length": 234 + }, + { + "text": "One of the many touching aspects of Australia’s response to the death of Phil Hughes has been the way in which Clarke – previously regarded as a bit too metrosexual for some Australian tastes – seems finally to have been accepted, no questions asked.", + "length": 256 + }, + { + "text": "Alas, no sooner has something promising been unearthed than it will be put back in its box: Alastair Cook returns on Wednesday after serving his over-rate ban, and the selectors will be almost back to where they started, agonising over Alex Hales and trying to convince themselves that the captain really is the answer.", + "length": 319 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5071412026882172 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:30.843025037Z", + "first_section_created": "2025-12-23T09:36:30.843410953Z", + "last_section_published": "2025-12-23T09:36:30.843797669Z", + "all_results_received": "2025-12-23T09:36:30.946932722Z", + "output_generated": "2025-12-23T09:36:30.94714893Z", + "total_processing_time_ms": 104, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 103, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:30.843410953Z", + "publish_time": "2025-12-23T09:36:30.843633262Z", + "first_worker_start": "2025-12-23T09:36:30.844210385Z", + "last_worker_end": "2025-12-23T09:36:30.916817Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:30.844148583Z", + "start_time": "2025-12-23T09:36:30.844210385Z", + "end_time": "2025-12-23T09:36:30.844309589Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:30.844418Z", + "start_time": "2025-12-23T09:36:30.844659Z", + "end_time": "2025-12-23T09:36:30.916817Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:30.844278488Z", + "start_time": "2025-12-23T09:36:30.844349291Z", + "end_time": "2025-12-23T09:36:30.844455895Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:30.844255987Z", + "start_time": "2025-12-23T09:36:30.844318289Z", + "end_time": "2025-12-23T09:36:30.844378092Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:30.843695264Z", + "publish_time": "2025-12-23T09:36:30.843797669Z", + "first_worker_start": "2025-12-23T09:36:30.844308789Z", + "last_worker_end": "2025-12-23T09:36:30.946122Z", + "total_journey_time_ms": 102, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:30.84432639Z", + "start_time": "2025-12-23T09:36:30.844363491Z", + "end_time": "2025-12-23T09:36:30.844421194Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:30.844441Z", + "start_time": "2025-12-23T09:36:30.844592Z", + "end_time": "2025-12-23T09:36:30.946122Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 101 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:30.844312189Z", + "start_time": "2025-12-23T09:36:30.844371792Z", + "end_time": "2025-12-23T09:36:30.844468096Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:30.844245287Z", + "start_time": "2025-12-23T09:36:30.844308789Z", + "end_time": "2025-12-23T09:36:30.844478396Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 173, + "min_processing_ms": 72, + "max_processing_ms": 101, + "avg_processing_ms": 86, + "median_processing_ms": 101, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4059, + "slowest_section_id": 1, + "slowest_section_time_ms": 102 + } +} diff --git a/data/output/00790f55c293d0544e93bcba78bef3635e1f6a63.json b/data/output/00790f55c293d0544e93bcba78bef3635e1f6a63.json new file mode 100644 index 0000000..bf806ac --- /dev/null +++ b/data/output/00790f55c293d0544e93bcba78bef3635e1f6a63.json @@ -0,0 +1,316 @@ +{ + "file_name": "00790f55c293d0544e93bcba78bef3635e1f6a63.txt", + "total_words": 887, + "top_n_words": [ + { + "word": "the", + "count": 55 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "a", + "count": 21 + }, + { + "word": "s", + "count": 21 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "by", + "count": 11 + }, + { + "word": "at", + "count": 10 + }, + { + "word": "city", + "count": 10 + }, + { + "word": "with", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "\"We have everything in our hands.", + "length": 33 + }, + { + "text": "Ancelotti in troubled waters at the Bridge .", + "length": 44 + }, + { + "text": "Ferguson casts doubt over Hargreaves future .", + "length": 45 + }, + { + "text": "It's important to start to win the first trophies.", + "length": 50 + }, + { + "text": "\"Manchester City was a small team until three, four, five years ago.", + "length": 68 + }, + { + "text": "promoted home team into the danger area for the first time this season.", + "length": 71 + }, + { + "text": "The Italian urged his team not to get carried away, with the club still battling to qualify for next season's Champions League.", + "length": 127 + }, + { + "text": "\" United should have taken an early lead after dominating the first half-hour, with Dimitar Berbatov guilty of two bad misses in quick succession.", + "length": 146 + }, + { + "text": "City last won the tournament in 1969, and most recently reached the final in 1981 -- one of the modern classics which was won in a replay by Tottenham.", + "length": 151 + }, + { + "text": "In Saturday's Premier League action, third-placed defending champions Chelsea moved five points clear of City with a 3-1 victory at mid-table West Brom.", + "length": 152 + }, + { + "text": "Hart did well to tip a 65th-minute freekick from Nani onto the crossbar before Scholes' red card forced a reshuffle with Berbatov replaced by midfielder Anderson.", + "length": 162 + }, + { + "text": "West Brom had led through Nigeria striker peter Odemwingie, but Salomon Kalou put Chelsea ahead in the 26th minute and Frank Lampard sealed victory just before halftime.", + "length": 169 + }, + { + "text": "Everton consolidated seventh place with a 2-0 victory at home to Blackburn thanks to second-half goals by midfielder Leon Osman and defender Leighton Baines, the latter a penalty.", + "length": 179 + }, + { + "text": "United's bid to reach the final of the 140-year-old knockout competition for a record 19th time was then sabotaged by a moment of madness from 36-year-old midfielder Paul Scholes.", + "length": 179 + }, + { + "text": "Roberto Mancini's team will face either Bolton or Stoke in the final on May 14, with the two Premier League teams meeting in Sunday's second semifinal also at England's national stadium.", + "length": 186 + }, + { + "text": "The match ended in ugly scenes as United defender Rio Ferdinand had to be restrained after Anderson reacted to City striker Mario Balotelli's unwise celebrations in front of opposition fans.", + "length": 190 + }, + { + "text": "(CNN) -- Manchester United's hopes of winning a treble this season ended on Saturday with a 1-0 defeat to arch-rivals Manchester City in the semifinals of the English FA Cup at Wembley Stadium.", + "length": 193 + }, + { + "text": "Man of the match Toure was denied by Van der Sar in the final minute and City survived five minutes of time added on to reach the club's second final since winning the English League Cup in 1976.", + "length": 195 + }, + { + "text": "Arsenal can reduce United's lead to four points by beating sixth-placed Liverpool on Sunday in a match that Kenny Dalglish's team also need to win in order to beat Tottenham to the sole Europa League spot.", + "length": 205 + }, + { + "text": "The Ivory Coast midfielder pounced after consecutive errors by veteran goalkeeper Edwin van der Sar and midfielder Michael Carrick allowed him the chance to drill in a low shot seven minutes after halftime.", + "length": 206 + }, + { + "text": "Sunderland, European hopefuls earlier this season, slumped to an eighth defeat in nine games as the 2-0 loss at Birmingham left both clubs in a group of four teams on 38 points -- five clear of the relegation zone.", + "length": 214 + }, + { + "text": "The former England international, the only player of the two squads actually born in Manchester, was sent off with 18 minutes left for a reckless high lunge into the thigh of City's Argentine defender Pablo Zabaleta.", + "length": 216 + }, + { + "text": "Aston Villa moved up to ninth with a last-gasp 2-1 victory at second-bottom West Ham as substitute striker Gabriel Agbonlahor headed an injury-time winner after teammate Darren Bent canceled out Robbie Keane's early opener.", + "length": 223 + }, + { + "text": "The London club bounced back from the midweek Champions League exit at the hands of United as striker Didier Drogba leveled the scoring after this time being given the chance to start instead of $80 million signing Fernando Torres.", + "length": 231 + }, + { + "text": "The result eased any fears that Chelsea will not qualify for next season's top European competition, moving Carlo Ancelotti's team eight points clear of fifth-placed Tottenham -- who host United's main title rivals Arsenal on Wednesday.", + "length": 236 + }, + { + "text": "If we have the same spirit we had today, we have the fourth spot,\" he said, having molded City into contenders since taking charge December 2009 after being handed a lavish transfer kitty by billionaire owner Sheikh Mansour bin Zayed Al Nahyan.", + "length": 244 + }, + { + "text": "Alex Ferguson's team are on course for a record-breaking 19th English league title and have also reached the semifinals of the European Champions League, but Yaya Toure's second-half winner gave City the chance of winning a first trophy in 35 years.", + "length": 249 + }, + { + "text": "City, missing injured captain and top scorer Carlos Tevez, capitalized on some slack United defending after the interval as the 40-year-old Van der Sar -- who retires at the end of this season -- made a poor clearance and then Carrick gave the ball away to Toure.", + "length": 263 + }, + { + "text": "The Bulgarian, starting in place of suspended England striker Wayne Rooney, was first denied by quick-thinking goalkeeper Joe Hart and then belied the form that has seen him become the Premier League's top scorer this season by inexplicably scooping the ball over the bar from close range after being found by Nani's inviting low cross.", + "length": 336 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.4449227899312973 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:31.344434529Z", + "first_section_created": "2025-12-23T09:36:31.344820944Z", + "last_section_published": "2025-12-23T09:36:31.34546467Z", + "all_results_received": "2025-12-23T09:36:31.450797412Z", + "output_generated": "2025-12-23T09:36:31.45099802Z", + "total_processing_time_ms": 106, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 105, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:31.344820944Z", + "publish_time": "2025-12-23T09:36:31.345169358Z", + "first_worker_start": "2025-12-23T09:36:31.345749282Z", + "last_worker_end": "2025-12-23T09:36:31.449881Z", + "total_journey_time_ms": 105, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:31.34595719Z", + "start_time": "2025-12-23T09:36:31.346014692Z", + "end_time": "2025-12-23T09:36:31.346095896Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:31.346089Z", + "start_time": "2025-12-23T09:36:31.346218Z", + "end_time": "2025-12-23T09:36:31.449881Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 103 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:31.345851486Z", + "start_time": "2025-12-23T09:36:31.346150898Z", + "end_time": "2025-12-23T09:36:31.346270503Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:31.345645277Z", + "start_time": "2025-12-23T09:36:31.345749282Z", + "end_time": "2025-12-23T09:36:31.345818184Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:31.345234361Z", + "publish_time": "2025-12-23T09:36:31.34546467Z", + "first_worker_start": "2025-12-23T09:36:31.345919488Z", + "last_worker_end": "2025-12-23T09:36:31.394183Z", + "total_journey_time_ms": 48, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:31.345879587Z", + "start_time": "2025-12-23T09:36:31.345919488Z", + "end_time": "2025-12-23T09:36:31.345925189Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:31.346472Z", + "start_time": "2025-12-23T09:36:31.346588Z", + "end_time": "2025-12-23T09:36:31.394183Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 47 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:31.346102696Z", + "start_time": "2025-12-23T09:36:31.346188999Z", + "end_time": "2025-12-23T09:36:31.3461935Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:31.345920789Z", + "start_time": "2025-12-23T09:36:31.34595469Z", + "end_time": "2025-12-23T09:36:31.34595659Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 150, + "min_processing_ms": 47, + "max_processing_ms": 103, + "avg_processing_ms": 75, + "median_processing_ms": 103, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2535, + "slowest_section_id": 0, + "slowest_section_time_ms": 105 + } +} diff --git a/data/output/00795ad78e907ccc39d66c327b1509b3cdf99f1c.json b/data/output/00795ad78e907ccc39d66c327b1509b3cdf99f1c.json new file mode 100644 index 0000000..7729045 --- /dev/null +++ b/data/output/00795ad78e907ccc39d66c327b1509b3cdf99f1c.json @@ -0,0 +1,472 @@ +{ + "file_name": "00795ad78e907ccc39d66c327b1509b3cdf99f1c.txt", + "total_words": 1209, + "top_n_words": [ + { + "word": "the", + "count": 61 + }, + { + "word": "a", + "count": 32 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "in", + "count": 25 + }, + { + "word": "to", + "count": 24 + }, + { + "word": "norman", + "count": 23 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "christian", + "count": 20 + }, + { + "word": "music", + "count": 20 + }, + { + "word": "was", + "count": 20 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "...", + "length": 3 + }, + { + "text": "...", + "length": 3 + }, + { + "text": "...", + "length": 3 + }, + { + "text": "2 all time.", + "length": 11 + }, + { + "text": "Norman hit the U.", + "length": 17 + }, + { + "text": "He's got the answer.", + "length": 20 + }, + { + "text": "\" \"He set the standard.", + "length": 23 + }, + { + "text": "This album is not for you.", + "length": 26 + }, + { + "text": "Why don't you look into Jesus?", + "length": 30 + }, + { + "text": "U2's \"The Joshua Tree\" was sixth.", + "length": 33 + }, + { + "text": "John Mellencamp said he's one, too.", + "length": 35 + }, + { + "text": ", Petula Clark and Tennessee Ernie Ford.", + "length": 40 + }, + { + "text": "He created the space for others to exist.", + "length": 41 + }, + { + "text": "\"My songs weren't written for Christians.", + "length": 41 + }, + { + "text": "humorous, poignant and always rock 'n' roll.", + "length": 44 + }, + { + "text": "music scene at a turbulent time for the nation.", + "length": 47 + }, + { + "text": "The accolades came much sooner in the mainstream media.", + "length": 55 + }, + { + "text": "could do it as well, if not better, than the rest of us.", + "length": 56 + }, + { + "text": "British pop star Cliff Richard made no secret of his admiration.", + "length": 64 + }, + { + "text": "\"I listened to his records growing up, and saw him perform many times.", + "length": 70 + }, + { + "text": "\" The documentary is scheduled to go into limited theatrical release in early 2010.", + "length": 83 + }, + { + "text": "The vision he created for where Christian rock music could go still resonates today.", + "length": 84 + }, + { + "text": "His respect for the arena of entertainment is what gave him his power as a performer.", + "length": 85 + }, + { + "text": "\"To take a chance on mentioning Jesus on a secular record was a pretty important step.", + "length": 86 + }, + { + "text": "But today is a different story, according to figures from the Gospel Music Association.", + "length": 87 + }, + { + "text": "\" Norman's fans include U2, Guns N' Roses and Bob Dylan, according to his brother Charles.", + "length": 90 + }, + { + "text": "I was saying, 'I'm going to present the Gospel, and I'm not going to say it like you want.", + "length": 90 + }, + { + "text": "Christian music has come a long way in the 40 years since Norman pioneered Christian rock.", + "length": 90 + }, + { + "text": "His first solo album, \"Upon This Rock,\" came out in late 1969, after he left the band People!", + "length": 93 + }, + { + "text": "Larry Norman sang about drugs, politics, racism, sex and Jesus -- sometimes in the same song.", + "length": 93 + }, + { + "text": "It says 56 million units of Christian/Gospel music sold in 2008, totaling nearly $500 million.", + "length": 94 + }, + { + "text": "Norman was inducted into the Gospel Music Hall of Fame that same year, along with Elvis Presley.", + "length": 96 + }, + { + "text": "Think of him as rock music's street preacher, often referred to as \"the father of Christian rock.", + "length": 97 + }, + { + "text": "\" There were no happy songs about going to heaven; the tunes tackled the social issues of the day.", + "length": 98 + }, + { + "text": "\"While Larry is always referenced by his Christian beliefs, to me he was always an entertainer ...", + "length": 98 + }, + { + "text": "\" Black Francis of the alternative rock group the Pixies said Norman has been a lifelong influence.", + "length": 99 + }, + { + "text": "Norman, a hippie musician who set out to sing about his faith, blazed the trail for Christian rock.", + "length": 99 + }, + { + "text": "A similar list released in 2001 by CCM, a Christian music and lifestyle magazine, put the album at No.", + "length": 102 + }, + { + "text": "About 45 percent of adults said they disliked it, with only about 5 percent saying it was their favorite.", + "length": 105 + }, + { + "text": "Then there were the lyrics, especially on Norman's second solo effort, 1972's \"Only Visiting This Planet.", + "length": 105 + }, + { + "text": "\"It was a pretty gutsy move to sing about Jesus on his first record,\" younger brother Charles Norman said.", + "length": 106 + }, + { + "text": "\"When I first became a Christian, I hunted around in vain to find Christian rock 'n' roll music I could relate to.", + "length": 114 + }, + { + "text": "\" Album sales for the genre, during 2008, outsold classical, jazz, new age and Latin, according to Nielsen SoundScan.", + "length": 117 + }, + { + "text": "Then he was introduced to Norman's music, \"and I just was overjoyed and thrilled, because suddenly I could relate ...", + "length": 117 + }, + { + "text": "'\" The album also contained what became Larry Norman's signature song, \"Why Should the Devil Have All the Good Music.", + "length": 117 + }, + { + "text": "\"They immediately have fans that are passionate and devout not only about them as a band or artist, but to their message.", + "length": 121 + }, + { + "text": "Compare that with today, when nearly two-thirds of those asked in a recent Pew Resource Center poll said they listen to it.", + "length": 123 + }, + { + "text": "Larry Norman was a Christian rock musician before the genre existed, combining faith with a backbeat and social consciousness.", + "length": 126 + }, + { + "text": "\" \" 'Upon This Rock' was written to stand outside the Christian culture,\" Larry Norman said in an interview with CCM magazine.", + "length": 126 + }, + { + "text": "The San Francisco Bay area psychedelic group was fresh off the success of a Top 20 hit with a cover of the Zombies' \"I Love You.", + "length": 128 + }, + { + "text": "' \" No small surprise, given that rock music at the time was the soundtrack of the counterculture and was far from the mainstream.", + "length": 130 + }, + { + "text": "As many Americans looked for answers, Norman offered his faith, a bold decision for an unknown solo artist making his major-label debut.", + "length": 136 + }, + { + "text": "Being a rocker in the late 1960s wasn't just flying in the face of a conservative Christian music industry but mainstream America as well.", + "length": 138 + }, + { + "text": "A national survey conducted by Louis Harris and Associates in 1966 found that rock 'n' roll was by far the most unpopular music in the country.", + "length": 143 + }, + { + "text": "\"Stuff like that shocked uptight Christians,\" Charles Norman said in an NPR interview shortly after his brother's death in February 2008 at age 60.", + "length": 147 + }, + { + "text": "It just all sounded horrible,\" Richard said on \"Rockspell,\" a Gospel-music-themed BBC television show he hosted in 1986, and on which Norman appeared.", + "length": 150 + }, + { + "text": "\"Before Larry Norman, there was not any Christian music industry,\" said Shawn McSpadden, manager for Switchfoot and the Grammy Award-winning band Third Day.", + "length": 156 + }, + { + "text": "In 1971, Billboard Magazine called Norman \"the most important writer since Paul Simon,\" while Time magazine pegged him as \"probably the top solo artist in the field.", + "length": 165 + }, + { + "text": "\" More than 300 versions of Norman's songs have been recorded by other artists, including non-Gospel acts as diverse as Richard and Francis, as well as Sammy Davis Jr.", + "length": 167 + }, + { + "text": "\" It was a time when college campuses were erupting in anti-Vietnam War protests and the nation was still trying to digest what had just happened that summer at Woodstock.", + "length": 171 + }, + { + "text": "\"One of his songs, it's called 'Why Don't You Look Into Jesus,' one of the lines is: 'You've got gonorrhea on Valentine's Day [VD] and you're still looking for the perfect lay.", + "length": 176 + }, + { + "text": "Norman sang about drugs, politics, racism, sex, venereal disease and Jesus -- sometimes in the same song -- getting his music banned from Christian bookstores that might have sold it.", + "length": 183 + }, + { + "text": "In fact, I used to dress up like him; long blond hair with bangs, sort of a grown-out British invasion look, with black jacket, black shirt, black pants and two-tone black and white cheerleader shoes,\" Francis said.", + "length": 215 + }, + { + "text": "ATLANTA, Georgia (CNN) -- Long before U2 and Bono blazed their own paths, and decades before the Christian music industry became a half-billion-dollar annual business, a hippie musician with long blond locks paved the way.", + "length": 222 + }, + { + "text": "\" Nearly two decades after its release, \"Only Visiting This Planet,\" helmed by Beatles' producer George Martin, was recognized as a seminal recording for the genre, voted by CCM magazine in 1990 as the greatest Christian album ever recorded.", + "length": 241 + }, + { + "text": "\"The Christian music industry has been very business-savvy, probably without even realizing it, in that a lot of the artists and bands use their local church as their home base when they begin their careers,\" said Bruce Burch of the University of Georgia's Music Business School.", + "length": 279 + }, + { + "text": "\" \"Between 1969 and 1979, Larry Norman was the Christian rock scene's answer to Bob Dylan, John Lennon and Mick Jagger,\" said Emmy-nominated director David Di Sabatino, who takes a critical look at Norman's career and life in his documentary \"Fallen Angel: The Outlaw Larry Norman.", + "length": 281 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4502952992916107 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:31.846266737Z", + "first_section_created": "2025-12-23T09:36:31.846629852Z", + "last_section_published": "2025-12-23T09:36:31.846990166Z", + "all_results_received": "2025-12-23T09:36:31.969909416Z", + "output_generated": "2025-12-23T09:36:31.970159226Z", + "total_processing_time_ms": 123, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 122, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:31.846629852Z", + "publish_time": "2025-12-23T09:36:31.846875662Z", + "first_worker_start": "2025-12-23T09:36:31.847378982Z", + "last_worker_end": "2025-12-23T09:36:31.930254Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:31.847532788Z", + "start_time": "2025-12-23T09:36:31.847687394Z", + "end_time": "2025-12-23T09:36:31.847778698Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:31.847732Z", + "start_time": "2025-12-23T09:36:31.847866Z", + "end_time": "2025-12-23T09:36:31.930254Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:31.847428884Z", + "start_time": "2025-12-23T09:36:31.847501587Z", + "end_time": "2025-12-23T09:36:31.847631092Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:31.847312979Z", + "start_time": "2025-12-23T09:36:31.847378982Z", + "end_time": "2025-12-23T09:36:31.847420983Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:31.846926964Z", + "publish_time": "2025-12-23T09:36:31.846990166Z", + "first_worker_start": "2025-12-23T09:36:31.847496887Z", + "last_worker_end": "2025-12-23T09:36:31.968978Z", + "total_journey_time_ms": 122, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:31.847559589Z", + "start_time": "2025-12-23T09:36:31.847600391Z", + "end_time": "2025-12-23T09:36:31.847634892Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:31.847757Z", + "start_time": "2025-12-23T09:36:31.847883Z", + "end_time": "2025-12-23T09:36:31.968978Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 121 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:31.847516487Z", + "start_time": "2025-12-23T09:36:31.847551189Z", + "end_time": "2025-12-23T09:36:31.847603191Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:31.847459485Z", + "start_time": "2025-12-23T09:36:31.847496887Z", + "end_time": "2025-12-23T09:36:31.847516387Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 203, + "min_processing_ms": 82, + "max_processing_ms": 121, + "avg_processing_ms": 101, + "median_processing_ms": 121, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3462, + "slowest_section_id": 1, + "slowest_section_time_ms": 122 + } +} diff --git a/data/output/0079922ef4bd7f24fe2dee228693dc54b034f467.json b/data/output/0079922ef4bd7f24fe2dee228693dc54b034f467.json new file mode 100644 index 0000000..ee4fb1f --- /dev/null +++ b/data/output/0079922ef4bd7f24fe2dee228693dc54b034f467.json @@ -0,0 +1,258 @@ +{ + "file_name": "0079922ef4bd7f24fe2dee228693dc54b034f467.txt", + "total_words": 421, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "hostel", + "count": 13 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "on", + "count": 10 + }, + { + "word": "jumbo", + "count": 9 + }, + { + "word": "is", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "was", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "Starting at just £34.", + "length": 22 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "32 per person to share a four bedroom dorm right up to £159.", + "length": 61 + }, + { + "text": "Stockholm Arlanda is the first facility in Jumbo Hostel chain.", + "length": 62 + }, + { + "text": "'Jumbo Hostel is the world's first hostel housed in a Boeing 747.", + "length": 65 + }, + { + "text": "33 for a private deluxe double bedroom with private shower facilities.", + "length": 70 + }, + { + "text": "Here you can spend the night on board a real jumbo jet - on the ground.", + "length": 71 + }, + { + "text": "But the price dramatically varies depending on the type of room customers opt for.", + "length": 82 + }, + { + "text": "A room with a view: This private double bedroom cockpit suite overlooks the runway .", + "length": 84 + }, + { + "text": "The Jumbo Hostel is the first to have ever been built on board a real Boeing 747 jet.", + "length": 85 + }, + { + "text": "A description on the hostel's website reads: 'Welcome to one of the world's coolest stays!", + "length": 90 + }, + { + "text": "It has been grounded since 2002 and is a decommissioned 747-200 model that was built in 1976.", + "length": 93 + }, + { + "text": "' Stockholm Arlanda, which is open 24 hours a day, is the first facility in Jumbo Hostel chain .", + "length": 96 + }, + { + "text": "High-flying on a budget: The Jumbo Hostel is the first to be built on board a real Boeing 747 jet .", + "length": 99 + }, + { + "text": "Out of service: The 747-200 plane was built in 1976 and grounded in 2002 after being decommissioned .", + "length": 102 + }, + { + "text": "Unique experience: Some of the original seating from when the Boeing was still in action remains on board .", + "length": 107 + }, + { + "text": "The hostel, complete with lounge and cafe, is situated on the runway at Stockholm Arlanda Airport in Sweden .", + "length": 109 + }, + { + "text": "A single suite: The plane also has single rooms with en-suite bathroom facilities costing around £137 a night .", + "length": 112 + }, + { + "text": "The innovative hostel was opened in January 2009 after the jumbo jet was given a complete overhaul and renovated.", + "length": 113 + }, + { + "text": "The hostel offers the usual facilities including a bed, flat screen TV, Wi-Fi, and breakfast from the lounge cafe.", + "length": 114 + }, + { + "text": "Flyers can also use a free shuttle to take them across the runway to catch their actual plane at the main airport.", + "length": 114 + }, + { + "text": "Nodding off on a plane isn't always the easiest thing to do but customers at this unique hostel won't have the same problem.", + "length": 124 + }, + { + "text": "Back up and running: The innovative hostel was opened in January 2009 after the jumbo jet was given a complete overhaul and renovated .", + "length": 136 + }, + { + "text": "'Night and day visitors will experience the unique feeling of staying inside a jumbo jet, retired and converted into a hostel and museum.", + "length": 137 + }, + { + "text": "It boasts 25 rooms including a luxury cockpit suite, showers and toilet facilities, as well as breathtaking views across the runway at Stockholm Arlanda Airport in Sweden.", + "length": 172 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.4112939238548279 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:32.347907738Z", + "first_section_created": "2025-12-23T09:36:32.34822795Z", + "last_section_published": "2025-12-23T09:36:32.348366656Z", + "all_results_received": "2025-12-23T09:36:32.414340413Z", + "output_generated": "2025-12-23T09:36:32.414500019Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:32.34822795Z", + "publish_time": "2025-12-23T09:36:32.348366656Z", + "first_worker_start": "2025-12-23T09:36:32.34895838Z", + "last_worker_end": "2025-12-23T09:36:32.413448Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:32.348978081Z", + "start_time": "2025-12-23T09:36:32.349037783Z", + "end_time": "2025-12-23T09:36:32.349078885Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:32.349269Z", + "start_time": "2025-12-23T09:36:32.349406Z", + "end_time": "2025-12-23T09:36:32.413448Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:32.34894848Z", + "start_time": "2025-12-23T09:36:32.348999682Z", + "end_time": "2025-12-23T09:36:32.349060084Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:32.348903178Z", + "start_time": "2025-12-23T09:36:32.34895838Z", + "end_time": "2025-12-23T09:36:32.349053284Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2364, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/007999d996816e9be9e1ea8e9941e22fac1ceb67.json b/data/output/007999d996816e9be9e1ea8e9941e22fac1ceb67.json new file mode 100644 index 0000000..2b176a0 --- /dev/null +++ b/data/output/007999d996816e9be9e1ea8e9941e22fac1ceb67.json @@ -0,0 +1,480 @@ +{ + "file_name": "007999d996816e9be9e1ea8e9941e22fac1ceb67.txt", + "total_words": 1016, + "top_n_words": [ + { + "word": "i", + "count": 35 + }, + { + "word": "the", + "count": 35 + }, + { + "word": "and", + "count": 29 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "of", + "count": 22 + }, + { + "word": "my", + "count": 17 + }, + { + "word": "was", + "count": 17 + }, + { + "word": "cancer", + "count": 16 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "cervical", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "If .", + "length": 4 + }, + { + "text": "It .", + "length": 4 + }, + { + "text": "uk .", + "length": 4 + }, + { + "text": "org.", + "length": 4 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "jostrust.", + "length": 9 + }, + { + "text": "It usually occurs .", + "length": 19 + }, + { + "text": "cervix (neck of the womb).", + "length": 26 + }, + { + "text": "Anna Hodgekiss for MailOnline .", + "length": 31 + }, + { + "text": "swelling in the legs and kidneys.", + "length": 33 + }, + { + "text": "I’m so honoured and proud of her.", + "length": 35 + }, + { + "text": "It could, quite simply, save their life.", + "length": 40 + }, + { + "text": "wall - and this was pushing against her bladder.", + "length": 48 + }, + { + "text": "most cases, abnormal bleeding is the first sign.", + "length": 48 + }, + { + "text": "I just wanted to live to see my children grow up.", + "length": 49 + }, + { + "text": "' For more information and support visit http://www.", + "length": 52 + }, + { + "text": "She will find out next month if it has been successful.", + "length": 55 + }, + { + "text": "' In hindsight, she says there were other symptoms, too.", + "length": 56 + }, + { + "text": "is not a test for cervical cancer, but it identifies early .", + "length": 60 + }, + { + "text": "the cancer has spread there may be other symptoms including .", + "length": 61 + }, + { + "text": "She also had symptoms such as irregular periods and back pain.", + "length": 62 + }, + { + "text": "after sex although any unusual bleeding should be investigated.", + "length": 63 + }, + { + "text": "'Even in my darkest moments they all managed to brighten my day.", + "length": 64 + }, + { + "text": "'I’d never had any accidents before so it was a complete shock.", + "length": 65 + }, + { + "text": "'My treatment included internal radiotherapy called brachytherapy.", + "length": 66 + }, + { + "text": "'I really don’t think I could’ve done any of this without them.", + "length": 67 + }, + { + "text": "Ms White said: 'I was devastated - I have six children to care for.", + "length": 67 + }, + { + "text": "Within a week, tests revealed the mother-of-six had cervical cancer.", + "length": 68 + }, + { + "text": "About 2,900 cases of cervical cancer are diagnosed each year in the UK.", + "length": 71 + }, + { + "text": "abnormalities which, if left untreated, could develop into cancer of the .", + "length": 74 + }, + { + "text": "constipation, blood in the urine, loss of bladder control, bone pain and .", + "length": 74 + }, + { + "text": "'But as soon as I got home and saw my children again, I knew I had to fight.", + "length": 76 + }, + { + "text": "A scan revealed she had a tumour the size of an egg sitting on the cervical .", + "length": 77 + }, + { + "text": "'We all laughed and I joked that I couldn’t get married wearing a tena lady.", + "length": 78 + }, + { + "text": "'But my labour was different this time and my cervix didn’t dilate properly.", + "length": 78 + }, + { + "text": "She said: The treatment was really tiring and at times, I felt like giving up.", + "length": 78 + }, + { + "text": "'I want to encourage other women to not delay going to the doctors like I did.", + "length": 78 + }, + { + "text": "'I just kept thinking I was going to get better so delayed going to the doctors.", + "length": 80 + }, + { + "text": "'We urge all women who are eligible for cervical screening to attend when invited.", + "length": 82 + }, + { + "text": "' She said: 'I thought doctors would suggest a hysterectomy which I was happy with.", + "length": 83 + }, + { + "text": "After the cancer diagnosis, she underwent six weeks of radiotherapy and chemotherapy .", + "length": 86 + }, + { + "text": "Emma White, 36, was dancing along to the Wii programme when she lost control of her bladder .", + "length": 93 + }, + { + "text": "The NHS offers a free cervical screening test to all women aged 25-64 every three to five years.", + "length": 96 + }, + { + "text": "'I knew something was wrong when I wet myself, I’d never experienced incontinence in the past.", + "length": 96 + }, + { + "text": "' Despite that, I was up to date with all my smear tests and never dreamt this could happen to me.", + "length": 98 + }, + { + "text": "Ms White said: 'I want to thank my amazing family, children and friends from the bottom of my heart.", + "length": 100 + }, + { + "text": "The symptoms aren't always obvious and may not appear until the disease has reached an advanced stage.", + "length": 102 + }, + { + "text": "Ms White will find out next month whether the treatment has been successful and if she has the all-clear.", + "length": 105 + }, + { + "text": "'I’d also suffered from irregular periods, so after months of putting it off, I decided to go to my GP.", + "length": 105 + }, + { + "text": "Ms White (with partner Gary) said her children were instrumental in keeping her spirits up during treatment.", + "length": 108 + }, + { + "text": "'On my daughter’s birthday on Boxing Day it’s a tradition for our family to have a dance off on the Wii.", + "length": 108 + }, + { + "text": "'But they decided the best course of action for me was to undergo six weeks of chemotherapy and radiotherapy.", + "length": 109 + }, + { + "text": "Emma White, from Faversham, Kent, was dancing along to the Wii programme when she lost control of her bladder.", + "length": 110 + }, + { + "text": "A mother has credited her Wii console with saving her life after it alerted her to symptoms of cervical cancer.", + "length": 111 + }, + { + "text": "‘I was also suffering with lower back ache - which is also a sign of cervical cancer - even before I had Everlyn.", + "length": 115 + }, + { + "text": "'It took until November for me to realise that it had been over six months and my body still wasn’t back to normal.", + "length": 117 + }, + { + "text": "'Thankfully it took just six weeks of chemotherapy and radiotherapy before a scan showed my tumour had completely gone.", + "length": 119 + }, + { + "text": "'I look back now and think there’s a strong possibility I already had the cancer when I gave birth which terrifies me.", + "length": 120 + }, + { + "text": "“I also want to thank the incredible support I received from both the Macmillan nurses and Jo’s Cervical Cancer Trust.", + "length": 122 + }, + { + "text": "The 36-year-old was celebrating her daughter’s birthday on Boxing Day last year when the embarrassing incident happened.", + "length": 122 + }, + { + "text": "Other symptoms include pain in and around the vagina during sex, an unpleasant smelling discharge and pain when passing urine.", + "length": 126 + }, + { + "text": "The former nursery practitioner said: 'My partner Gary and I were over the moon when we found out I was pregnant with Everlyn.", + "length": 126 + }, + { + "text": "Ms White said she knew something was wrong when she exprienced loss of bladder control, as she had never suffered from it before.", + "length": 129 + }, + { + "text": "Her children, clockwise from top left, are: Jake, 14, Trevor, 17, Ella-Rose, 13,Ruby, 8, Everylyn-Ray, 17 months, and Bettsie, 11 .", + "length": 131 + }, + { + "text": "Concerned because she had never experienced incontinence before, Ms White went to see her GP, who found she was bleeding from her cervix .", + "length": 138 + }, + { + "text": "'Jo’s Cervical Cancer Trust offers information and support to women, their friends and family affected by cervical cancer and abnormalities.", + "length": 142 + }, + { + "text": "' Her children, Trevor, 17, Ella-Rose, 13, Bettsie, 11, Ruby, 8, Everylyn-Ray, 17 months and stepson Jake, 14, were all affected by the devastating news.", + "length": 153 + }, + { + "text": "' Ms White first blamed her tell-tale signs of cervical cancer - such as irregular periods - on recently having given birth to daughter Everlyn-Ray in March 2013.", + "length": 162 + }, + { + "text": "' Robert Music, Chief Executive of Jo’s Cervical Cancer Trust said: 'Every year 3,000 UK women are diagnosed with cervical cancer and 300,000 women will be told they have abnormal cells on the cervix that may require treatment.", + "length": 229 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6449065208435059 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:32.84909812Z", + "first_section_created": "2025-12-23T09:36:32.849474735Z", + "last_section_published": "2025-12-23T09:36:32.849923153Z", + "all_results_received": "2025-12-23T09:36:32.955586208Z", + "output_generated": "2025-12-23T09:36:32.956184132Z", + "total_processing_time_ms": 107, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 105, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:32.849474735Z", + "publish_time": "2025-12-23T09:36:32.849764247Z", + "first_worker_start": "2025-12-23T09:36:32.850296868Z", + "last_worker_end": "2025-12-23T09:36:32.933056Z", + "total_journey_time_ms": 83, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:32.850410173Z", + "start_time": "2025-12-23T09:36:32.850985996Z", + "end_time": "2025-12-23T09:36:32.851111401Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:32.850485Z", + "start_time": "2025-12-23T09:36:32.850647Z", + "end_time": "2025-12-23T09:36:32.933056Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 82 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:32.850435574Z", + "start_time": "2025-12-23T09:36:32.850490476Z", + "end_time": "2025-12-23T09:36:32.850607281Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:32.850205265Z", + "start_time": "2025-12-23T09:36:32.850296868Z", + "end_time": "2025-12-23T09:36:32.85034047Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:32.849823449Z", + "publish_time": "2025-12-23T09:36:32.849923153Z", + "first_worker_start": "2025-12-23T09:36:32.850412273Z", + "last_worker_end": "2025-12-23T09:36:32.954632Z", + "total_journey_time_ms": 104, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:32.850408273Z", + "start_time": "2025-12-23T09:36:32.850444674Z", + "end_time": "2025-12-23T09:36:32.850463875Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:32.850807Z", + "start_time": "2025-12-23T09:36:32.850922Z", + "end_time": "2025-12-23T09:36:32.954632Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 103 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:32.850420073Z", + "start_time": "2025-12-23T09:36:32.850475976Z", + "end_time": "2025-12-23T09:36:32.850503577Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:32.850383972Z", + "start_time": "2025-12-23T09:36:32.850412273Z", + "end_time": "2025-12-23T09:36:32.850419873Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 185, + "min_processing_ms": 82, + "max_processing_ms": 103, + "avg_processing_ms": 92, + "median_processing_ms": 103, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2860, + "slowest_section_id": 1, + "slowest_section_time_ms": 104 + } +} diff --git a/data/output/0079a0208cc54a8b1057ee8bd7fef1a1f51eaf28.json b/data/output/0079a0208cc54a8b1057ee8bd7fef1a1f51eaf28.json new file mode 100644 index 0000000..329b916 --- /dev/null +++ b/data/output/0079a0208cc54a8b1057ee8bd7fef1a1f51eaf28.json @@ -0,0 +1,254 @@ +{ + "file_name": "0079a0208cc54a8b1057ee8bd7fef1a1f51eaf28.txt", + "total_words": 481, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "was", + "count": 16 + }, + { + "word": "it", + "count": 14 + }, + { + "word": "bear", + "count": 12 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "stafford", + "count": 8 + }, + { + "word": "by", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "m.", + "length": 2 + }, + { + "text": ", a Canadian company.", + "length": 21 + }, + { + "text": "It happened really quick.", + "length": 25 + }, + { + "text": "'There was just not enough time to get the bear spray out.", + "length": 58 + }, + { + "text": "' The hair-raising encounter reportedly occurred about 1:30 p.", + "length": 62 + }, + { + "text": "'We started walking uphill to get away from it and it started walking toward us.", + "length": 80 + }, + { + "text": "'I was wearing gloves and they were wet and it was confusing,' she reportedly added.", + "length": 84 + }, + { + "text": "She told the News-Miner she will need surgery for a broken bone in her hand, as well.", + "length": 85 + }, + { + "text": "It was the first fatal bear mauling in the park's 95-year history, officials have said.", + "length": 87 + }, + { + "text": "'I was worried I was going to die briefly, but it was fine once she let me go and ran away...", + "length": 93 + }, + { + "text": "'The bear sort of walked out of the fog and it had two cubs with it,' Stafford told the News-Miner.", + "length": 99 + }, + { + "text": "Sunday afternoon - just two days after another grizzly bear ate a hiker in Denali National Park and Preserve.", + "length": 109 + }, + { + "text": "'We stopped once we saw it was following us and tried to get the bear spray out but by then it was already running toward us.", + "length": 125 + }, + { + "text": "' Stafford, a geological engineering student, suffered cuts to her right hand and scratch marks on her back that required stitches.", + "length": 131 + }, + { + "text": "Richard White, 49, of San Diego was killed after he brazenly violated the quarter-mile berth that hikers at the park are required to give bears.", + "length": 144 + }, + { + "text": "Attacked: Julia Stafford was working for a Canadian mining company in Alaska when she - and a male colleague - were attacked by a grizzly bear .", + "length": 144 + }, + { + "text": "Dangerous Encounter: Stafford was reportedly dragged some 20 feet by the beast after it knocked her over - and then just as suddenly meandered away .", + "length": 149 + }, + { + "text": "It happened when Stafford and Kerry - his last name was not available - were collecting rock samples in the rain near a foggy ravine for Pure Nickel Inc.", + "length": 153 + }, + { + "text": "During that incident, officials said, White spent eight minutes taking photos of his killer from a range of about 50 yards before the bear turned and attacked.", + "length": 160 + }, + { + "text": "Julia Stafford, 20, a University of British Columbia student, smartly played dead after the bear knocked her - and a male colleague - over and on to the ground.", + "length": 160 + }, + { + "text": "The attack happened two days after a lone backpacker in Denali  was killed by another grizzly bear that has since been fatally shot by an Alaskan state trooper.", + "length": 161 + }, + { + "text": "A Seattle woman was attacked and dragged 20 feet by a grizzly bear while working for a Canadian mining company in Alaska - but survived with minor only injuries.", + "length": 161 + }, + { + "text": "Striking Twice: The attack on Stafford and her colleague came only two days after hiker Richard White of San Diego was eaten by a bear in Denali National Park and Preserve .", + "length": 173 + }, + { + "text": "'It bit my hand and kind of dragged me 20 feet over the rocks and just left me,' Stafford told The Fairbanks Daily News-Miner from her hospital bed while ironically holding - you guessed it - a teddy bear.", + "length": 205 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.750957727432251 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:33.350691219Z", + "first_section_created": "2025-12-23T09:36:33.351032932Z", + "last_section_published": "2025-12-23T09:36:33.351193739Z", + "all_results_received": "2025-12-23T09:36:33.420696138Z", + "output_generated": "2025-12-23T09:36:33.420870845Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:33.351032932Z", + "publish_time": "2025-12-23T09:36:33.351193739Z", + "first_worker_start": "2025-12-23T09:36:33.351837065Z", + "last_worker_end": "2025-12-23T09:36:33.419715Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:33.351814564Z", + "start_time": "2025-12-23T09:36:33.351882767Z", + "end_time": "2025-12-23T09:36:33.351947569Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:33.352054Z", + "start_time": "2025-12-23T09:36:33.352208Z", + "end_time": "2025-12-23T09:36:33.419715Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:33.351803364Z", + "start_time": "2025-12-23T09:36:33.351873366Z", + "end_time": "2025-12-23T09:36:33.351948769Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:33.351785863Z", + "start_time": "2025-12-23T09:36:33.351837065Z", + "end_time": "2025-12-23T09:36:33.351859166Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2689, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/0079c51f12f563e5224ac9cb5125f5ee15f5747f.json b/data/output/0079c51f12f563e5224ac9cb5125f5ee15f5747f.json new file mode 100644 index 0000000..1169735 --- /dev/null +++ b/data/output/0079c51f12f563e5224ac9cb5125f5ee15f5747f.json @@ -0,0 +1,210 @@ +{ + "file_name": "0079c51f12f563e5224ac9cb5125f5ee15f5747f.txt", + "total_words": 339, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "i", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "blog", + "count": 7 + }, + { + "word": "united", + "count": 7 + }, + { + "word": "on", + "count": 6 + }, + { + "word": "with", + "count": 6 + }, + { + "word": "as", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "I like it though!", + "length": 17 + }, + { + "text": "Fan focus: The Republik of Mancunia blog is popular with Manchester United supporters.", + "length": 86 + }, + { + "text": "\"I imagine if I wasn't in love with the club, I'd have given up on it long before now.", + "length": 86 + }, + { + "text": "Scott said running a blog was a very difficult and time-consuming task, though he enjoyed the interaction with other fans.", + "length": 122 + }, + { + "text": "\"I find it odd thinking of some lads thousands of miles away and hours apart sat at a computer and reading my latest rant.", + "length": 122 + }, + { + "text": "Authored by Manchester-born and raised 25-year-old Scott (who prefers to remain anonymous), the blog began in the 2005-2006 season.", + "length": 131 + }, + { + "text": "It takes up a lot of time and you get people who support other teams having a go at you on a daily basis, sometimes United fans and all!", + "length": 136 + }, + { + "text": "The Republik of Mancunia blog focuses on the Old Trafford club and is updated daily with a keen following among thousands of Manchester United fans.", + "length": 148 + }, + { + "text": "\"(In 2005-2006) I was also getting on soapbox about the fact we were not in decline, which the current media at the time seemed to think we were,\" he said.", + "length": 155 + }, + { + "text": "\"I love talking about United and I'm an argumentative guy, so getting to write my opinions down about the latest goings on is something I really enjoy doing.", + "length": 157 + }, + { + "text": "Scott, also known as \"Scott the Red,\" told CNN that before starting the blog he had been published on several football sites, and then \"fell into\" creating the Republik of Mancunia web site.", + "length": 190 + }, + { + "text": "\" Scott said the readership of his blog, and also contributions to his Manchester United forum came from areas as widely spread as Europe, Africa and even as far as Asia and South America -- with a strong base at home in the United Kingdom.", + "length": 240 + }, + { + "text": "LONDON, England (CNN) -- With Manchester United continuing their top form from last season and aiming for what would be a remarkable clean sweep of trophies this year, it's only appropriate that we should profile a blog somehow linked to the Red Devils.", + "length": 253 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.43075159192085266 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:33.851991306Z", + "first_section_created": "2025-12-23T09:36:33.852402022Z", + "last_section_published": "2025-12-23T09:36:33.85259543Z", + "all_results_received": "2025-12-23T09:36:33.915549165Z", + "output_generated": "2025-12-23T09:36:33.91567377Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:33.852402022Z", + "publish_time": "2025-12-23T09:36:33.85259543Z", + "first_worker_start": "2025-12-23T09:36:33.853134652Z", + "last_worker_end": "2025-12-23T09:36:33.914682Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:33.853225955Z", + "start_time": "2025-12-23T09:36:33.853299958Z", + "end_time": "2025-12-23T09:36:33.85333186Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:33.853478Z", + "start_time": "2025-12-23T09:36:33.853619Z", + "end_time": "2025-12-23T09:36:33.914682Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:33.853218955Z", + "start_time": "2025-12-23T09:36:33.853327959Z", + "end_time": "2025-12-23T09:36:33.853380162Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:33.853052748Z", + "start_time": "2025-12-23T09:36:33.853134652Z", + "end_time": "2025-12-23T09:36:33.853171753Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1855, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/0079f8448a071d3d4c61461296fa895524490e78.json b/data/output/0079f8448a071d3d4c61461296fa895524490e78.json new file mode 100644 index 0000000..4461030 --- /dev/null +++ b/data/output/0079f8448a071d3d4c61461296fa895524490e78.json @@ -0,0 +1,270 @@ +{ + "file_name": "0079f8448a071d3d4c61461296fa895524490e78.txt", + "total_words": 661, + "top_n_words": [ + { + "word": "in", + "count": 28 + }, + { + "word": "the", + "count": 26 + }, + { + "word": "to", + "count": 25 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "and", + "count": 19 + }, + { + "word": "al", + "count": 15 + }, + { + "word": "was", + "count": 14 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "berjawi", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "'That's when he emerged, so to speak,' said one.", + "length": 48 + }, + { + "text": "Bilal al-Berjawi was killed in a 2012 drone attack .", + "length": 52 + }, + { + "text": "By that point he was a key figure in al-Qaeda's East African operations.", + "length": 72 + }, + { + "text": "Bilal al-Berjawi was killed by a drone strike in Somalia three years ago.", + "length": 74 + }, + { + "text": "journalists James Foley and Steven Sotloff, Japanese reporter Kenji Goto and Syrian soldiers.", + "length": 93 + }, + { + "text": "He was also responsible for securing weapons and for overseeing the contingent of foreign fighters.", + "length": 99 + }, + { + "text": "The government stripped both men of their British passports and they were killed in separate drone attacks in 2012.", + "length": 115 + }, + { + "text": "A laptop found at the premises contained extremist material including encouragement of jihad and instructions on making car bombs.", + "length": 130 + }, + { + "text": "Originally from Lebanon, Berjawi first joined militants in Somalia in 2006 and then returned to Britain in 2007 on a fundraising venture.", + "length": 137 + }, + { + "text": "Lebanon born Bilal al-Berjawi (right) traveled freely between the UK and terror hubs in East Africa as he rose to prominence with al-Qaeda.", + "length": 140 + }, + { + "text": "Court documents relating to Mohammed Emwazi today showed that Emwazi was part of an extremist network linked to al-Shabaab, the BBC reported.", + "length": 141 + }, + { + "text": "Bilal al-Berjawi passed through UK Border Control at least five times between 2006 and 2009 as he travelled between London and African terror cells.", + "length": 148 + }, + { + "text": "Police stand guard near Jihadi John's last known residence in Queens Park, London, after his identity was confirmed as 27-year-old Mohammed Emwazi .", + "length": 149 + }, + { + "text": "MI5 files stated that his name was one of several terror suspects believed to be aiding the Somali terror cell in their provision of funds and equipment.", + "length": 153 + }, + { + "text": "Berjawi was said to be a senior figure with al-Qaeda in East Africa, a radical part of the al-Shabaab movement, and was known as one of its most active fighters.", + "length": 161 + }, + { + "text": "He is believed to have radicalised Mohammed Emwazi (left) - revealed earlier today as the masked ISIS executioner Jihadi John - on his return visits to London .", + "length": 161 + }, + { + "text": "Berjawi married a Somali woman in London, and in October 2009, they decided to slip out of the country again, without telling their families that they were leaving.", + "length": 164 + }, + { + "text": "Over that period he was rising to prominence as a senior member of al-Shabaab, the al-Qaeda linked group in Somalia - returning to the UK only to raise funds and to marry.", + "length": 171 + }, + { + "text": "They aroused the suspicions of the manager of the hotel at which they were staying in Mombasa and when they moved to Nairobi, police raided the premises and they were deported.", + "length": 176 + }, + { + "text": "All three were the subject of a manhunt, accused of crossing into Uganda to plot terrorist attacks that culminated in bomb attacks in Kampala in July 2011 that killed 74 people.", + "length": 177 + }, + { + "text": "Emwazi was today named as terror suspect 'Jihadi John', the black cloaked executioner who featured in the beheading videos of British aid workers Alan Henning and David Haines, U.", + "length": 179 + }, + { + "text": "According to people who have moved in jihadi circles in west London, Emwazi began to be noticed 'five or six years ago', when al-Berjawi was still flying between the UK and Africa.", + "length": 180 + }, + { + "text": "In an online obituary published in 2013, al-Shabaab said 'Abu Hafs' had been trained by two top military commanders of al-Qaeda in East Africa, Fazul Abdullah Mohammed and Saleh Ali Saleh Nabhan.", + "length": 195 + }, + { + "text": "The man at the centre of a network that influenced the ISIS executioner identified as 'Jihadi John' was allowed to fly in and out of London to terrorism hot spots unchecked for almost three years.", + "length": 196 + }, + { + "text": "He left again in February 2009 with his friend Mohammed Sakr, who is of Egyptian origin, to travel to Kenya, telling their families they were going on a safari - the same front used by Mohammed Emwazi when he flew to Tanzania in 2009.", + "length": 234 + }, + { + "text": "It is believed that it was during these return visits to London that al-Berjawi became the driving force behind the radicalisation of 27-year-old Mohammed Emwazi, using his growing stature within the terror group to radicalise homegrown extremists.", + "length": 248 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.580615758895874 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:34.353427498Z", + "first_section_created": "2025-12-23T09:36:34.354860056Z", + "last_section_published": "2025-12-23T09:36:34.355153867Z", + "all_results_received": "2025-12-23T09:36:34.418157205Z", + "output_generated": "2025-12-23T09:36:34.418305611Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:34.354860056Z", + "publish_time": "2025-12-23T09:36:34.355153867Z", + "first_worker_start": "2025-12-23T09:36:34.355830695Z", + "last_worker_end": "2025-12-23T09:36:34.415698Z", + "total_journey_time_ms": 60, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:34.355773792Z", + "start_time": "2025-12-23T09:36:34.355830695Z", + "end_time": "2025-12-23T09:36:34.355900298Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:34.356037Z", + "start_time": "2025-12-23T09:36:34.356199Z", + "end_time": "2025-12-23T09:36:34.415698Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 59 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:34.355788493Z", + "start_time": "2025-12-23T09:36:34.355879197Z", + "end_time": "2025-12-23T09:36:34.355986101Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:34.355824094Z", + "start_time": "2025-12-23T09:36:34.355895397Z", + "end_time": "2025-12-23T09:36:34.355926699Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 59, + "min_processing_ms": 59, + "max_processing_ms": 59, + "avg_processing_ms": 59, + "median_processing_ms": 59, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3844, + "slowest_section_id": 0, + "slowest_section_time_ms": 60 + } +} diff --git a/data/output/007a0e7872f778312e787a0d56700a571b4b871a.json b/data/output/007a0e7872f778312e787a0d56700a571b4b871a.json new file mode 100644 index 0000000..8d52c83 --- /dev/null +++ b/data/output/007a0e7872f778312e787a0d56700a571b4b871a.json @@ -0,0 +1,254 @@ +{ + "file_name": "007a0e7872f778312e787a0d56700a571b4b871a.txt", + "total_words": 330, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "she", + "count": 13 + }, + { + "word": "was", + "count": 12 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "her", + "count": 8 + }, + { + "word": "i", + "count": 8 + }, + { + "word": "on", + "count": 7 + }, + { + "word": "pritchard", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Stock image used .", + "length": 18 + }, + { + "text": "Corey Charlton for MailOnline .", + "length": 31 + }, + { + "text": "'I went to look and asked if she was okay.", + "length": 42 + }, + { + "text": "He said: 'I do not believe this was drowning.", + "length": 45 + }, + { + "text": "' Mrs Pritchard had four children and six grandchildren.", + "length": 56 + }, + { + "text": "I heard Beryl ask if she was okay when she was in the water.", + "length": 60 + }, + { + "text": "I told her to come out of the water and then she flipped over.", + "length": 62 + }, + { + "text": "She was a former ladies’ captain at Henlle Golf Club, near Gobowen.", + "length": 69 + }, + { + "text": "' She added: 'Mrs Pritchard was rushed to Serik Hospital where she died.", + "length": 72 + }, + { + "text": "Tourists swimming in the sea in the Belek region where Mrs Pritchard died.", + "length": 74 + }, + { + "text": "North Shropshire coroner John Ellery recorded a conclusion of accidental death.", + "length": 79 + }, + { + "text": "' He continued: 'She went into the water and the jetty was quite a long way out.", + "length": 80 + }, + { + "text": "' A post-mortem examination in Turkey had ruled Mrs Pritchard died from drowning.", + "length": 81 + }, + { + "text": "Sylvia Pritchard, a grandmother to six, died while on a golfing holiday in Turkey .", + "length": 83 + }, + { + "text": "But Mr Ellery overruled that based on a second post-mortem examination held in the UK.", + "length": 86 + }, + { + "text": "I believe the head injury came after hitting her head on something hard below the water.", + "length": 88 + }, + { + "text": "There was a bit of a swell and I think she has gone in and banged her head on something.", + "length": 88 + }, + { + "text": "'She surfaced, she took a stroke which was very weak and then she flipped over face down.", + "length": 89 + }, + { + "text": "Sylvia Pritchard, 68, was staying in the Belek region of Turkey when the accident occurred in May.", + "length": 98 + }, + { + "text": "' Mrs Pritchard's husband, a Vale of Llangollen Golf Club member, said: 'I never saw Sylvia go into the water.", + "length": 110 + }, + { + "text": "A grandmother holidaying in Turkey died from a brain bleed after she hit her head on a jetty while jumping into the sea.", + "length": 120 + }, + { + "text": "An inquest heard how the grandmother from Oswestry, Shropshire, was on a golfing holiday with her husband John and four friends.", + "length": 128 + }, + { + "text": "Her friend Beryl Aubrey, who was on holiday with her at the time, recalled: 'I watched her climb down the steps before dropping into the water.", + "length": 143 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4940955936908722 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:34.855948234Z", + "first_section_created": "2025-12-23T09:36:34.856400852Z", + "last_section_published": "2025-12-23T09:36:34.856563359Z", + "all_results_received": "2025-12-23T09:36:34.926445877Z", + "output_generated": "2025-12-23T09:36:34.926588582Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:34.856400852Z", + "publish_time": "2025-12-23T09:36:34.856563359Z", + "first_worker_start": "2025-12-23T09:36:34.857182684Z", + "last_worker_end": "2025-12-23T09:36:34.92557Z", + "total_journey_time_ms": 69, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:34.857136882Z", + "start_time": "2025-12-23T09:36:34.857182684Z", + "end_time": "2025-12-23T09:36:34.857218185Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:34.85734Z", + "start_time": "2025-12-23T09:36:34.857497Z", + "end_time": "2025-12-23T09:36:34.92557Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 68 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:34.857149882Z", + "start_time": "2025-12-23T09:36:34.857220385Z", + "end_time": "2025-12-23T09:36:34.857262587Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:34.857202684Z", + "start_time": "2025-12-23T09:36:34.857265587Z", + "end_time": "2025-12-23T09:36:34.857288388Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 68, + "min_processing_ms": 68, + "max_processing_ms": 68, + "avg_processing_ms": 68, + "median_processing_ms": 68, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1824, + "slowest_section_id": 0, + "slowest_section_time_ms": 69 + } +} diff --git a/data/output/007a3697265477abf9f8f467366a0303bec5a446.json b/data/output/007a3697265477abf9f8f467366a0303bec5a446.json new file mode 100644 index 0000000..f84c36c --- /dev/null +++ b/data/output/007a3697265477abf9f8f467366a0303bec5a446.json @@ -0,0 +1,298 @@ +{ + "file_name": "007a3697265477abf9f8f467366a0303bec5a446.txt", + "total_words": 599, + "top_n_words": [ + { + "word": "the", + "count": 33 + }, + { + "word": "and", + "count": 24 + }, + { + "word": "in", + "count": 20 + }, + { + "word": "of", + "count": 20 + }, + { + "word": "a", + "count": 16 + }, + { + "word": "to", + "count": 12 + }, + { + "word": "were", + "count": 9 + }, + { + "word": "s", + "count": 8 + }, + { + "word": "somalia", + "count": 8 + }, + { + "word": "by", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "m.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "\" The U.", + "length": 8 + }, + { + "text": "Ahmed Mohamud.", + "length": 14 + }, + { + "text": "headquarters in Mogadishu.", + "length": 26 + }, + { + "text": "\"The Turkish were our main target.", + "length": 34 + }, + { + "text": "A minivan packed with explosives went off around 5 p.", + "length": 53 + }, + { + "text": "Designated a foreign terrorist organization by the U.", + "length": 53 + }, + { + "text": "Half of the famine victims were children younger than 5.", + "length": 56 + }, + { + "text": "\"We are behind the martyrdom explosion,\" the group claimed via Twitter.", + "length": 71 + }, + { + "text": "Somali police and Turkish embassy guards, meanwhile, converged on the scene.", + "length": 76 + }, + { + "text": "Turkish embassy sources said that two of its staff members were among the wounded.", + "length": 82 + }, + { + "text": "In addition to its volatile security situation, Somalia has been plagued by famine.", + "length": 83 + }, + { + "text": "in the heart of Mogadishu, just a few meters from the Turkish diplomatic post, said police Col.", + "length": 95 + }, + { + "text": "Journalist Omar Nor reported from Somalia, and CNN's Greg Botelho wrote this story from Atlanta.", + "length": 96 + }, + { + "text": "and all members of the international community who are working for peace and stability in Somalia.", + "length": 98 + }, + { + "text": "Al-Shabaab -- a militant Islamist group with connections to al Qaeda -- claimed responsibility for the attack.", + "length": 110 + }, + { + "text": "Mangled buses and cars ended up in a disfigured heap, while the windows of numerous nearby apartments were shattered.", + "length": 117 + }, + { + "text": "When it was over, two Somali security guards, a university student and three attackers were dead, according to Mohamud.", + "length": 119 + }, + { + "text": "government reacted Saturday to \"the terrorist attack\" by pledging its solidarity with Turkey, \"the people of Somalia ...", + "length": 120 + }, + { + "text": "In one such attack that al-Shabaab took credit for, in June, at least 14 people died and 15 were wounded in an attack on U.", + "length": 123 + }, + { + "text": "\" He lauded Turks' \"tireless efforts\" over the past two years to help build new schools and hospitals, among other contributions.", + "length": 129 + }, + { + "text": "government in 2008, al-Shabaab has waged a war with Somali's government in an effort to implement a stricter form of Islamic law in the country.", + "length": 144 + }, + { + "text": "But the militants have persisted by maintaining control of large rural areas of southern and central Somalia and staging guerrilla-style attacks.", + "length": 145 + }, + { + "text": "The targeted member of Parliament, Sheikh Adan Mader, and other lawmakers were out of the car when the blast occurred and were unharmed, police said.", + "length": 149 + }, + { + "text": "\"I condemn this criminal act of terrorism and my government and security forces will do everything it can to catch those who planned and directed it,\" Hassan said.", + "length": 163 + }, + { + "text": "\" Saturday's bombing was the second major attack in Mogadishu in a few days: On Wednesday, at least one person died in the capital after a bomb hidden in a lawmaker's car blew up.", + "length": 179 + }, + { + "text": "Its forces were pushed out of Mogadishu in summer 2011 by Somali and other African forces, raising hopes of a return to relative security in a city after about 20 years of violence.", + "length": 181 + }, + { + "text": "Somali President Hassan Sheikh Mohamoud similarly blasted what he called \"an act of cowardly desperation by terrorists\" against one of his nation's \"most determined and dependable allies.", + "length": 187 + }, + { + "text": "Mogadishu, Somalia (CNN) -- Blood and body parts littered the ground outside Turkey's embassy in Somalia on Saturday afternoon, the grisly result of a blast that police said left six dead and nine wounded.", + "length": 205 + }, + { + "text": "A May report by the United Nations Food and Agriculture Organization and the USAID-funded Famine Early Warning Systems Network, found that 258,000 Somalis had died in the famine between October 2010 and April 2012.", + "length": 214 + }, + { + "text": "\" \"This cowardly act will not shake our commitment to continue working for the brighter, more democratic and prosperous future the people of Somalia deserve,\" State Department spokeswoman Jen Psaki said in a statement.", + "length": 218 + }, + { + "text": "\"We must continue to stand firm against those who seek to destroy this country and, with the brave support of our allies, we must double our efforts to deliver the peaceful future the Somali people so desperately want.", + "length": 218 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8807242512702942 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:35.356741432Z", + "first_section_created": "2025-12-23T09:36:35.357034644Z", + "last_section_published": "2025-12-23T09:36:35.35719385Z", + "all_results_received": "2025-12-23T09:36:35.422213973Z", + "output_generated": "2025-12-23T09:36:35.42240918Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:35.357034644Z", + "publish_time": "2025-12-23T09:36:35.35719385Z", + "first_worker_start": "2025-12-23T09:36:35.357719971Z", + "last_worker_end": "2025-12-23T09:36:35.421339Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:35.35769237Z", + "start_time": "2025-12-23T09:36:35.357756373Z", + "end_time": "2025-12-23T09:36:35.357821175Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:35.357896Z", + "start_time": "2025-12-23T09:36:35.358017Z", + "end_time": "2025-12-23T09:36:35.421339Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:35.357666469Z", + "start_time": "2025-12-23T09:36:35.357736372Z", + "end_time": "2025-12-23T09:36:35.357825176Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:35.357650969Z", + "start_time": "2025-12-23T09:36:35.357719971Z", + "end_time": "2025-12-23T09:36:35.357747872Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3600, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/007a8f8c0af4b027d9aaf3e7b7937e51e53bfea7.json b/data/output/007a8f8c0af4b027d9aaf3e7b7937e51e53bfea7.json new file mode 100644 index 0000000..cc87a60 --- /dev/null +++ b/data/output/007a8f8c0af4b027d9aaf3e7b7937e51e53bfea7.json @@ -0,0 +1,520 @@ +{ + "file_name": "007a8f8c0af4b027d9aaf3e7b7937e51e53bfea7.txt", + "total_words": 1136, + "top_n_words": [ + { + "word": "the", + "count": 81 + }, + { + "word": "to", + "count": 33 + }, + { + "word": "in", + "count": 29 + }, + { + "word": "a", + "count": 28 + }, + { + "word": "of", + "count": 28 + }, + { + "word": "his", + "count": 16 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "that", + "count": 12 + }, + { + "word": "was", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "P.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "No.", + "length": 3 + }, + { + "text": "com .", + "length": 5 + }, + { + "text": "Gaspar and G.", + "length": 13 + }, + { + "text": "Apparently, Dr.", + "length": 15 + }, + { + "text": "Sound familiar?", + "length": 15 + }, + { + "text": "Captain Marvel Jr.", + "length": 18 + }, + { + "text": "Captain Marvel Jr.", + "length": 18 + }, + { + "text": "All rights reserved.", + "length": 20 + }, + { + "text": "But that wasn't all.", + "length": 20 + }, + { + "text": "In 1963, chemists P.", + "length": 20 + }, + { + "text": "It was perfect timing.", + "length": 22 + }, + { + "text": "saves the bad-hair day .", + "length": 24 + }, + { + "text": "A copy of Captain Marvel Jr.", + "length": 28 + }, + { + "text": "Plus, the admiration was mutual.", + "length": 32 + }, + { + "text": "Superman defeats the Ku Klux Klan .", + "length": 35 + }, + { + "text": "The KKK was a great fit for the role.", + "length": 37 + }, + { + "text": "Donald Duck's scientific breakthrough .", + "length": 39 + }, + { + "text": ", known as \"America's most famous boy hero.", + "length": 43 + }, + { + "text": "A Spider-Man villain keeps folks out of jail .", + "length": 46 + }, + { + "text": "\" Although it sounded like nonsense, it wasn't.", + "length": 47 + }, + { + "text": "\" For more mental_floss articles, visit mentalfloss.", + "length": 52 + }, + { + "text": "Donald Duck had beaten him to the punch by 22 years.", + "length": 52 + }, + { + "text": "Mental Floss: Musicians performing on Sesame Street .", + "length": 53 + }, + { + "text": "Mental Floss: 5 memorable moments in comic book censorship .", + "length": 60 + }, + { + "text": "Entire contents of this article copyright, Mental Floss LLC.", + "length": 60 + }, + { + "text": "Within two weeks of the broadcast, KKK recruitment was down.", + "length": 60 + }, + { + "text": "Mental Floss: Truth about lie detectors (and Wonder Woman) 4.", + "length": 61 + }, + { + "text": "Of course, Elvis never tried to hide his love for the Captain.", + "length": 62 + }, + { + "text": "In 1983, Goss produced his first batch of electronic monitors.", + "length": 62 + }, + { + "text": "\" A footnote revealed that \"literature\" as the Donald Duck comic.", + "length": 65 + }, + { + "text": "In the 1940s, \"The Adventures of Superman\" was a radio sensation.", + "length": 65 + }, + { + "text": "felt better knowing that the gadget had once nabbed Spider-Man, too.", + "length": 68 + }, + { + "text": "In the post-World War II era, the Klan experienced a huge resurgence.", + "length": 69 + }, + { + "text": "And by 1948, people were showing up to Klan rallies just to mock them.", + "length": 70 + }, + { + "text": "During his early teen years, Elvis was obsessed with Captain Marvel Jr.", + "length": 71 + }, + { + "text": "In a 1977 edition of Spider-Man, Peter Parker has the tables turned on him.", + "length": 75 + }, + { + "text": "But why were these top American chemists looking to comics for inspiration?", + "length": 75 + }, + { + "text": "By regularly attending meetings, he became privy to the organization's secrets.", + "length": 79 + }, + { + "text": "Most famously, Martha Stewart donned one while she was under house arrest in 2004.", + "length": 82 + }, + { + "text": "As the storyline progressed, the shows exposed many of the KKK's most guarded secrets.", + "length": 86 + }, + { + "text": "But when he took the information to local authorities, they had little interest in using it.", + "length": 92 + }, + { + "text": "Indeed, Krøyer's concept could be traced back to a Donald Duck comic conceived by Carl Barks.", + "length": 94 + }, + { + "text": "Although Kingpin loses in the end (he always does), one New Mexico judge saw beauty in his plan.", + "length": 96 + }, + { + "text": "However, when Krøyer tried to license his invention with the Dutch patent office, he was denied.", + "length": 97 + }, + { + "text": "Struggling to make use of his findings, Kennedy approached the writers of the Superman radio serial.", + "length": 100 + }, + { + "text": "Hammond wrote a technical article about methylene that included a reference to the Donald Duck story.", + "length": 101 + }, + { + "text": "paid tribute to The King in one issue, referring to the singer as \"the greatest modern-day philosopher.", + "length": 103 + }, + { + "text": "For starters, Superman brought down the Ku Klux Klan, and Donald Duck raised ships from the ocean floor.", + "length": 104 + }, + { + "text": "The Klan had become so powerful and intimidating that police were hesitant to build a case against them.", + "length": 104 + }, + { + "text": "By revealing everything from code words to rituals, the program completely stripped the Klan of its mystique.", + "length": 109 + }, + { + "text": "Kids across the country huddled around their sets as the Man of Steel leapt off the page and over the airwaves.", + "length": 111 + }, + { + "text": "But it turns out that The King might have been more interested in their fashion statements than their special powers.", + "length": 117 + }, + { + "text": "The villain, Kingpin, tracks down Spidey using an electronic transmitter that he'd fastened to the superhero's wrist.", + "length": 117 + }, + { + "text": "It seems the web-footed children's hero had deduced the chemical intermediate long before it had been proven to exist.", + "length": 118 + }, + { + "text": "In addition to being the most celebrated artist of the Donald Duck comics, Barks was known for his scientific prowess.", + "length": 118 + }, + { + "text": "With the war over and the Nazis no longer a threat, the producers were looking for a new villain for Superman to fight.", + "length": 119 + }, + { + "text": "Authorities in Albuquerque then tested the devices on five offenders, using the gadgets as an alternative to incarceration.", + "length": 123 + }, + { + "text": "In a 16-episode series titled \"Clan of the Fiery Cross,\" the writers pitted the Man of Steel against the men in white hoods.", + "length": 124 + }, + { + "text": "Today, the transmitters are a common sight in courtrooms across the country, usually in the form of electronic ankle bracelets.", + "length": 127 + }, + { + "text": "Like most American kids in the 1940s, Elvis Presley fantasized about growing up to be like his favorite comic book superheroes.", + "length": 127 + }, + { + "text": "Its membership was skyrocketing, and its political influence was increasing, so Kennedy went undercover to infiltrate the group.", + "length": 128 + }, + { + "text": "Although Superman had been fighting crime in print since 1938, the weekly audio episodes fleshed out his storyline even further.", + "length": 128 + }, + { + "text": "(Mental Floss) -- If you think comic book characters do amazing things in comic books, you won't believe what they can do off the page.", + "length": 135 + }, + { + "text": "In 1966, Danish engineer Karl Krøyer developed a method for raising sunken ships off the ocean floor by injecting them with polystyrene foam balls.", + "length": 148 + }, + { + "text": "51 still sits in his preserved childhood bedroom in an apartment in Memphis, and his full comics collection remains intact in the attic at Graceland.", + "length": 149 + }, + { + "text": "When Elvis set out to conquer America with his rock 'n' roll ways, he copied the 'do, thus making it one of the most famous hairstyles of the 20th century.", + "length": 155 + }, + { + "text": "\" A younger version of Captain Marvel, the character sported an unusual hairstyle that featured a curly tuft of hair falling over the side of his forehead.", + "length": 155 + }, + { + "text": "Gaspar had been a lifelong Donald Duck fan, and he'd rediscovered Donald's early reference to methylene while collecting old copies of the classic adventures.", + "length": 158 + }, + { + "text": "It was on the radio that Superman first faced kryptonite, met The Daily Planet reporter Jimmy Olsen, and became associated with \"truth, justice, and the American way.", + "length": 166 + }, + { + "text": "Inspired by the strip, Judge Jack Love turned to computer salesman Michael Goss and asked if he could create a similar device to keep track of crime suspects awaiting trial.", + "length": 173 + }, + { + "text": "\" So, it's no wonder that when a young writer and activist named Stetson Kennedy decided to expose the secrets of the Ku Klux Klan, he looked to a certain superhero for inspiration.", + "length": 181 + }, + { + "text": "Gaspar never disclosed how much his work owed to Duckburg's most famous resident, but then again, how many scientists would confess that they used comic books to bolster their research?", + "length": 185 + }, + { + "text": "The final paragraph read, \"Among experiments which have not, to our knowledge, been carried out as yet is one of a most intriguing nature suggested in the literature of no less than 19 years ago.", + "length": 195 + }, + { + "text": "Captain Marvel also gets credit for the short capes Elvis wore on the back of his jumpsuits, as well as The King's famous TCB logo, which bears a striking resemblance to Marvel's lightning bolt insignia.", + "length": 203 + }, + { + "text": "So in a 1944 story, when Donald got a bump on his head that turned him into a genius, the duck managed to mumble, \"If I mix CH2 [a methylene compound] with NH4 [ammonium] and boil the atoms in osmotic fog, I should get speckled nitrogen!", + "length": 237 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4461854547262192 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:35.857937647Z", + "first_section_created": "2025-12-23T09:36:35.859608514Z", + "last_section_published": "2025-12-23T09:36:35.859903926Z", + "all_results_received": "2025-12-23T09:36:35.942130442Z", + "output_generated": "2025-12-23T09:36:35.942349351Z", + "total_processing_time_ms": 84, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 82, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:35.859608514Z", + "publish_time": "2025-12-23T09:36:35.859814922Z", + "first_worker_start": "2025-12-23T09:36:35.860365645Z", + "last_worker_end": "2025-12-23T09:36:35.941308Z", + "total_journey_time_ms": 81, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:35.860382445Z", + "start_time": "2025-12-23T09:36:35.860444048Z", + "end_time": "2025-12-23T09:36:35.860538352Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:35.860953Z", + "start_time": "2025-12-23T09:36:35.861115Z", + "end_time": "2025-12-23T09:36:35.941308Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 80 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:35.860335143Z", + "start_time": "2025-12-23T09:36:35.860409846Z", + "end_time": "2025-12-23T09:36:35.860546352Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:35.860276941Z", + "start_time": "2025-12-23T09:36:35.860365645Z", + "end_time": "2025-12-23T09:36:35.860412546Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:35.859847224Z", + "publish_time": "2025-12-23T09:36:35.859903926Z", + "first_worker_start": "2025-12-23T09:36:35.860486049Z", + "last_worker_end": "2025-12-23T09:36:35.936048Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:35.860585953Z", + "start_time": "2025-12-23T09:36:35.860656656Z", + "end_time": "2025-12-23T09:36:35.860695358Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:35.860844Z", + "start_time": "2025-12-23T09:36:35.860989Z", + "end_time": "2025-12-23T09:36:35.936048Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:35.86050975Z", + "start_time": "2025-12-23T09:36:35.860542452Z", + "end_time": "2025-12-23T09:36:35.860588554Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:35.860438848Z", + "start_time": "2025-12-23T09:36:35.860486049Z", + "end_time": "2025-12-23T09:36:35.86050425Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 155, + "min_processing_ms": 75, + "max_processing_ms": 80, + "avg_processing_ms": 77, + "median_processing_ms": 80, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3313, + "slowest_section_id": 0, + "slowest_section_time_ms": 81 + } +} diff --git a/data/output/007ae95fd859ef30117b9c80232b7b134b370ea3.json b/data/output/007ae95fd859ef30117b9c80232b7b134b370ea3.json new file mode 100644 index 0000000..d4e2a9b --- /dev/null +++ b/data/output/007ae95fd859ef30117b9c80232b7b134b370ea3.json @@ -0,0 +1,250 @@ +{ + "file_name": "007ae95fd859ef30117b9c80232b7b134b370ea3.txt", + "total_words": 461, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "that", + "count": 14 + }, + { + "word": "is", + "count": 12 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "in", + "count": 7 + }, + { + "word": "for", + "count": 6 + }, + { + "word": "be", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Freedom is key to good science.", + "length": 31 + }, + { + "text": "The ethics of inquiry need to adapt.", + "length": 36 + }, + { + "text": "The more we know, the worse for the terrorists.", + "length": 47 + }, + { + "text": "Freedom from terror is also key to good science.", + "length": 48 + }, + { + "text": "Unfortunately, that is no longer the world we live in.", + "length": 54 + }, + { + "text": "Access to some who have clearance to see it should be possible.", + "length": 63 + }, + { + "text": "To go further with potentially catastrophic data is to court trouble.", + "length": 69 + }, + { + "text": "When they conflict, the latter is more important freedom than the former.", + "length": 73 + }, + { + "text": "The opinions expressed in this commentary are solely those of Arthur Caplan.", + "length": 76 + }, + { + "text": "We don't have to hide the genetic map for a killer avian flu virus from all eyes.", + "length": 81 + }, + { + "text": "The papers describe how to alter bird-flu virus to be more infectious and potentially nastier.", + "length": 94 + }, + { + "text": "Yes, this is same bird flu virus that, as it moved into pigs, was freaking us all out last year.", + "length": 96 + }, + { + "text": "If there is one thing that scientists hate, it is any policy that restricts research in any way.", + "length": 96 + }, + { + "text": "That is so when it comes to publishing detailed information about dangerous viruses and microbes.", + "length": 97 + }, + { + "text": "Journals and those who write for them ought to do all they can to try and ensure that most important freedom.", + "length": 109 + }, + { + "text": "Once in a long while, however, the price of the truth is simply too high to let scientists disclose their findings publicly.", + "length": 124 + }, + { + "text": "If that is done, then the truth will still be known about whether those making claims of being able to engineer the virus can actually do so.", + "length": 141 + }, + { + "text": "There are those who will say that the only way to fight terror is to adhere to those values that have proven crucial to the advance of science over the decades.", + "length": 160 + }, + { + "text": "Scientists are taught that they need to be bold in asking questions and not let anything deter them from following their thinking wherever it leads, no matter how unpopular that might be.", + "length": 187 + }, + { + "text": "They are also taught the absolute necessity of making their claims public in reputable journals so that other scientists can subject them to the critical skepticism from which the truth ultimately emerges.", + "length": 205 + }, + { + "text": "If you had the detailed map of the viral changes needed, then either a terrorist or an amateur \"garage\" biologist operating without the right safeguards would have a very effective critter for killing you and me.", + "length": 212 + }, + { + "text": "Handing the complete formula for making a nasty pandemic bug to any nut with access to the Internet or a subscription to a scientific journal makes no sense in a world that has seen the use of anthrax and sarin as weapons of terror.", + "length": 232 + }, + { + "text": "(CNN) -- On Tuesday, a federal advisory panel, the National Science Advisory Board for Biosecurity, recommended that university scientists who have submitted articles on how to modify a flu virus to two very prestigious journals delete critical information from them before publishing.", + "length": 285 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7047527432441711 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:36.360204405Z", + "first_section_created": "2025-12-23T09:36:36.361509257Z", + "last_section_published": "2025-12-23T09:36:36.361658063Z", + "all_results_received": "2025-12-23T09:36:36.424478097Z", + "output_generated": "2025-12-23T09:36:36.424666005Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:36.361509257Z", + "publish_time": "2025-12-23T09:36:36.361658063Z", + "first_worker_start": "2025-12-23T09:36:36.362200785Z", + "last_worker_end": "2025-12-23T09:36:36.423534Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:36.362198185Z", + "start_time": "2025-12-23T09:36:36.362269988Z", + "end_time": "2025-12-23T09:36:36.36231539Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:36.362383Z", + "start_time": "2025-12-23T09:36:36.362519Z", + "end_time": "2025-12-23T09:36:36.423534Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:36.362145283Z", + "start_time": "2025-12-23T09:36:36.362217086Z", + "end_time": "2025-12-23T09:36:36.362288289Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:36.362141983Z", + "start_time": "2025-12-23T09:36:36.362200785Z", + "end_time": "2025-12-23T09:36:36.362229086Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2638, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/007b2811e8b92bd6daf68079ad2a8161036d98a8.json b/data/output/007b2811e8b92bd6daf68079ad2a8161036d98a8.json new file mode 100644 index 0000000..30db80e --- /dev/null +++ b/data/output/007b2811e8b92bd6daf68079ad2a8161036d98a8.json @@ -0,0 +1,540 @@ +{ + "file_name": "007b2811e8b92bd6daf68079ad2a8161036d98a8.txt", + "total_words": 1540, + "top_n_words": [ + { + "word": "the", + "count": 109 + }, + { + "word": "to", + "count": 40 + }, + { + "word": "and", + "count": 37 + }, + { + "word": "david", + "count": 32 + }, + { + "word": "a", + "count": 30 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "of", + "count": 26 + }, + { + "word": "he", + "count": 25 + }, + { + "word": "ed", + "count": 24 + }, + { + "word": "s", + "count": 24 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "Wrong!", + "length": 6 + }, + { + "text": "victory.", + "length": 8 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "I don’t think so.", + "length": 19 + }, + { + "text": "Richard Pendlebury .", + "length": 20 + }, + { + "text": "19:31 EST, 27 March 2013 .", + "length": 26 + }, + { + "text": "Not Ed and David Miliband.", + "length": 26 + }, + { + "text": "02:51 EST, 28 March 2013 .", + "length": 26 + }, + { + "text": "’ And so the future lies westwards.", + "length": 37 + }, + { + "text": "’ But none that come quickly to mind.", + "length": 39 + }, + { + "text": "I didn’t want to become a distraction.", + "length": 40 + }, + { + "text": "Stranger things have happened in politics.", + "length": 42 + }, + { + "text": "One in which he might still wield real power.", + "length": 45 + }, + { + "text": "Like Banquo’s ghost at the feast, it seemed.", + "length": 46 + }, + { + "text": "‘It would make Labour look like North Korea.", + "length": 46 + }, + { + "text": "he needed to lead the party without distraction’.", + "length": 51 + }, + { + "text": "Then David tweeted: ‘Great day for Ed and Justine.", + "length": 52 + }, + { + "text": "He snatched the job David thought was his birthright.", + "length": 53 + }, + { + "text": "I never thought he would stay until the next election.", + "length": 54 + }, + { + "text": "A former Foreign Secretary as back bench MP for South Shields?", + "length": 62 + }, + { + "text": "Some have called it the final curtain on the New Labour project.", + "length": 64 + }, + { + "text": "They look very happy – congratulations from all the Milibands.", + "length": 64 + }, + { + "text": "Younger brother Ed was the deceptively geeky assassin with the bow.", + "length": 67 + }, + { + "text": "Unlike Ed, he would not throw down a direct challenge to a brother.", + "length": 67 + }, + { + "text": "‘She and David had a real chemistry when he was Foreign Secretary.", + "length": 68 + }, + { + "text": "Leaving Westminster wasn’t enough – only an ocean apart would do.", + "length": 69 + }, + { + "text": "I love him dearly but win or lose we will remain the best of friends’.", + "length": 72 + }, + { + "text": "And you have to remember that David Miliband is older than David Cameron.", + "length": 73 + }, + { + "text": "She is an American and their two children were both adopted in the States.", + "length": 74 + }, + { + "text": "’ Another added: ‘He knows in his heart of hearts he is finished here.", + "length": 74 + }, + { + "text": "‘She took it personally, and has not come to terms with it even slightly.", + "length": 75 + }, + { + "text": "Why should David go back into the Shadow Cabinet and improve things for Ed?", + "length": 75 + }, + { + "text": "As one ally said yesterday: ‘David does not want to stand still any longer.", + "length": 77 + }, + { + "text": "In his resignation statement, he said he had done it to ‘give Ed the space ...", + "length": 80 + }, + { + "text": "She admitted she had a crush on him and could end up giving him an important job.", + "length": 81 + }, + { + "text": "A black and yellow arrow is the vivid symbol of the International Rescue Committee.", + "length": 83 + }, + { + "text": "’ One Westminster source used rather blunter language: ‘He will never come back.", + "length": 84 + }, + { + "text": "But he would step in if other forces conspired to ensure that Ed lost the leadership.", + "length": 85 + }, + { + "text": "‘It would be too weird for one Miliband to take over from another,’ says the source.", + "length": 88 + }, + { + "text": "Brothers at war: The first autobiography of Ed Miliband made for uncomfortable reading .", + "length": 88 + }, + { + "text": "You will see it on flags flying over refugee camps from the Syrian borders to the Congo.", + "length": 88 + }, + { + "text": "I didn’t want the soap opera to take over the real substance of what needs to be done.", + "length": 88 + }, + { + "text": "‘What price David Miliband in a senior role in the White House and Ed Miliband in No 10?", + "length": 90 + }, + { + "text": "A new chapter in the David Miliband story begins, but the sibling rivalry continues unabated.", + "length": 93 + }, + { + "text": "There was a public coming together for Ed’s marriage to lawyer Justine Thornton in May 2011.", + "length": 94 + }, + { + "text": "He went on: ‘I want it to be the vision Ed Miliband has versus the vision David Cameron has.", + "length": 94 + }, + { + "text": "’ Tellingly, though, David and Louise did not attend the reception at Ed’s north London home.", + "length": 97 + }, + { + "text": "And the brothers, who once spoke several times a week, largely communicated through their offices.", + "length": 98 + }, + { + "text": "‘Louise feels that Ed’s wife Justine should have done the right thing and stopped her husband.", + "length": 98 + }, + { + "text": "‘Anyway, he is sick of the whole soap opera, psycho-drama that a minor role in British politics brings.", + "length": 105 + }, + { + "text": "’ Of course Miliband senior’s departure represents more than simply the end of his own ambitions here.", + "length": 106 + }, + { + "text": "Discord: David said he had quit politics to give his brother Ed, right, 'space' to lead the Labour party .", + "length": 106 + }, + { + "text": "Home ties: David Miliband's wife Louise Shackleton is American and their two children were adopted in the States .", + "length": 114 + }, + { + "text": "Like Blair before him, David Miliband is using the contacts he made in his former life to forge a lucrative future.", + "length": 115 + }, + { + "text": "So could David still have enough of a passion for politics to return from America and throw down the gauntlet once more?", + "length": 120 + }, + { + "text": "‘Heartbroken’ both by the dashing of his ambitions and the person behind it, David stepped down from front-rank politics.", + "length": 125 + }, + { + "text": "But the most telling factor is her continued bitterness at the way in which her husband was ‘betrayed’ by his little brother.", + "length": 129 + }, + { + "text": "But if David was devastated at this political assassination, he discovered that having more time on his hands could be lucrative.", + "length": 129 + }, + { + "text": "Today, with the Coalition in some disarray and Labour with a double-figure lead in the polls, that eventuality seems more unlikely.", + "length": 131 + }, + { + "text": "Written by two Left-wing writers, it was an uncompromising picture of the schism which made uncomfortable reading for all concerned.", + "length": 132 + }, + { + "text": "According to several Westminster insiders, David Miliband’s decision was vigorously encouraged by his musician wife Louise Shackelton.", + "length": 136 + }, + { + "text": "While David told Jeremy Paxman he no longer dreamed of being Prime Minister, his supporters privately suggested that he was biding his time.", + "length": 140 + }, + { + "text": "For the 47-year-old former Foreign Secretary and one-time No 10 hopeful, a new life along the Hudson rather than the Thames is about escaping Ed.", + "length": 145 + }, + { + "text": "The lasting damage became clear with the publication of the first biography of the new leader, Ed: The Milibands And The Making Of A Labour Leader.", + "length": 147 + }, + { + "text": "And even as his brother struggled to impose a credible leadership, there were signs that the David Miliband camp was still manoeuvring for position.", + "length": 148 + }, + { + "text": "‘The relationship between the two brothers has never really recovered, but the driving force is David’s wife Louise,’ an associate of David’s said.", + "length": 155 + }, + { + "text": "As one insider said: ‘The New Labour corpse was just about twitching with David Miliband still around on the back benches, but that’s the end of it now.", + "length": 156 + }, + { + "text": "’ And so the discord continues, giving the lie to Ed Miliband’s statement when he stood for the leadership that ‘David is my best friend in the world.", + "length": 156 + }, + { + "text": "The book also suggested that the brothers’ mother, Marion, was ‘devastated’ by the rift and wished she had persuaded her younger son not to have stood.", + "length": 157 + }, + { + "text": "’ What’s left, of course, is a Labour Party ever more in hock to the unions, and with policies that seem designed to drag it away from the centre ground.", + "length": 157 + }, + { + "text": "Louise wants to get right out of the political “Primrose Hill mafia” set from which the brothers emerged, and New York is also a good base for her own orchestral work.", + "length": 171 + }, + { + "text": "He embarked on the international lecture circuit, and took on ‘advisory’ roles, earning £70,000 for three-and-a-half days work with a Californian eco-technology company.", + "length": 174 + }, + { + "text": "There were also revelations about the smears – Ed was dubbed a ‘Bennite’, ‘Red Ed’ and ‘Forrest Gump’ – supporters of both brothers waged in the run-up to the ballot.", + "length": 182 + }, + { + "text": "It had done little to soothe his feelings to hear the victorious Ed declare that Britain had not ‘heard the last’ of David, and that he would be ‘around in one way or another’.", + "length": 184 + }, + { + "text": "‘Everyone thinks the Clintons sorted his new job for him, and you would think David will be heavily involved in Hillary’s presidential election campaign in 2016,’ said another Westminster source.", + "length": 201 + }, + { + "text": "'Took it personally': David's wife Louise Shackelton, left, believed Ed's wife Justine Thornton, right, should have done the right thing and put a stop to his efforts to become leader of the Labour party .", + "length": 205 + }, + { + "text": "The sight of the two Primrose Hill-raised Labour apparatchiks engaged in political fratricide was astonishing, and in the subsequent two-and-a-half years, no amount of fine public words or behind-the-scenes finessing has healed that wound.", + "length": 239 + }, + { + "text": "Leaks to the Guardian, which had supported David’s leadership bid, were a clear sign – such as the publication of emails which showed that the two Eds, Miliband and Balls, had plotted to oust Blair after Labour’s 2005 election victory.", + "length": 241 + }, + { + "text": "Yet it was the poison-tipped missile that thudded between David Miliband’s shoulder blades at the Labour leadership election which most informs his decision to abandon British politics for a place among New York’s glamorous charity elite.", + "length": 242 + }, + { + "text": "’ So ends serious recent speculation that the fraternal schism could be healed for the good of the party, with the more experienced and charismatic Miliband brother returning to the Shadow Cabinet to turn a strong position into a winning one at the next general election.", + "length": 273 + }, + { + "text": "In an interview with the BBC yesterday, he hinted at this when he said it was ‘hard for me to accept that I can best help the Labour Party by giving not just the space between the front bench and the back bench to Ed, but the space between the front bench and 3,000 miles away’.", + "length": 282 + }, + { + "text": "Forget Ed’s almost tear-stained eulogies yesterday at the news his brother was  leaving British politics; forget mentor Tony Blair’s hope that ‘this is time out, not time over’; forget the $450,000 salary the Twittersphere was predicting for the new IRC boss; set aside even David Miliband’s own part-explanation that he was repaying a debt to an organisation formed in the 1930s to help people like his parents flee Nazi persecution.", + "length": 445 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4962311238050461 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:36.861942541Z", + "first_section_created": "2025-12-23T09:36:36.863169491Z", + "last_section_published": "2025-12-23T09:36:36.86365241Z", + "all_results_received": "2025-12-23T09:36:36.95165146Z", + "output_generated": "2025-12-23T09:36:36.951890369Z", + "total_processing_time_ms": 89, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 87, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:36.863169491Z", + "publish_time": "2025-12-23T09:36:36.8634089Z", + "first_worker_start": "2025-12-23T09:36:36.864081828Z", + "last_worker_end": "2025-12-23T09:36:36.948131Z", + "total_journey_time_ms": 84, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:36.864194632Z", + "start_time": "2025-12-23T09:36:36.864250934Z", + "end_time": "2025-12-23T09:36:36.864361839Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:36.864317Z", + "start_time": "2025-12-23T09:36:36.86447Z", + "end_time": "2025-12-23T09:36:36.948131Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 83 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:36.864190832Z", + "start_time": "2025-12-23T09:36:36.864306937Z", + "end_time": "2025-12-23T09:36:36.864415541Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:36.863992624Z", + "start_time": "2025-12-23T09:36:36.864081828Z", + "end_time": "2025-12-23T09:36:36.864122929Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:36.863439102Z", + "publish_time": "2025-12-23T09:36:36.86365241Z", + "first_worker_start": "2025-12-23T09:36:36.864160231Z", + "last_worker_end": "2025-12-23T09:36:36.95097Z", + "total_journey_time_ms": 87, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:36.864193332Z", + "start_time": "2025-12-23T09:36:36.864253334Z", + "end_time": "2025-12-23T09:36:36.864319437Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:36.864318Z", + "start_time": "2025-12-23T09:36:36.864465Z", + "end_time": "2025-12-23T09:36:36.95097Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 86 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:36.864188132Z", + "start_time": "2025-12-23T09:36:36.864249634Z", + "end_time": "2025-12-23T09:36:36.864336238Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:36.864099828Z", + "start_time": "2025-12-23T09:36:36.864160231Z", + "end_time": "2025-12-23T09:36:36.864196332Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 169, + "min_processing_ms": 83, + "max_processing_ms": 86, + "avg_processing_ms": 84, + "median_processing_ms": 86, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 4463, + "slowest_section_id": 1, + "slowest_section_time_ms": 87 + } +} diff --git a/data/output/007b8e43c33fa56ee62ae0f3ec0570c8988129ac.json b/data/output/007b8e43c33fa56ee62ae0f3ec0570c8988129ac.json new file mode 100644 index 0000000..43d1de5 --- /dev/null +++ b/data/output/007b8e43c33fa56ee62ae0f3ec0570c8988129ac.json @@ -0,0 +1,254 @@ +{ + "file_name": "007b8e43c33fa56ee62ae0f3ec0570c8988129ac.txt", + "total_words": 362, + "top_n_words": [ + { + "word": "a", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "the", + "count": 14 + }, + { + "word": "of", + "count": 12 + }, + { + "word": "s", + "count": 11 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "i", + "count": 8 + }, + { + "word": "andrew", + "count": 7 + }, + { + "word": "dr", + "count": 7 + }, + { + "word": "he", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "How beautiful.", + "length": 14 + }, + { + "text": "Ted Thornhill .", + "length": 15 + }, + { + "text": "‘It's special.", + "length": 16 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "He said: ‘It opens a big connection between us.", + "length": 49 + }, + { + "text": "I enjoy an opportunity to celebrate with the family.", + "length": 52 + }, + { + "text": "The bond is instantaneous when you connect this way.", + "length": 52 + }, + { + "text": "The songs are his way of celebrating with the family.", + "length": 53 + }, + { + "text": "Crooner: Dr Carey Andrew-Jaja sings to a newborn baby .", + "length": 55 + }, + { + "text": "User Alison Nash wrote: ‘This brought tears to my eyes.", + "length": 57 + }, + { + "text": "Each of them is an individual and I've delivered thousands of babies.", + "length": 69 + }, + { + "text": "The birth of a newborn baby is always very special occasion, but at one U.", + "length": 74 + }, + { + "text": "On song: Dr Andrew-Jaja sings to every baby that he helps to deliver at Magee-Womens Hospital .", + "length": 95 + }, + { + "text": "He said: ‘He passed the baton onto me and I've sung to my babies ever since then and I do it every time.", + "length": 106 + }, + { + "text": "’ Dr Andrew-Jaja also believes that it’s a way of forgetting about the world’s problems for a few minutes.", + "length": 112 + }, + { + "text": "’ And Ron Baker commented: ‘That was incredible and should be required of all doctors that deliver babies...", + "length": 112 + }, + { + "text": "’ Soothing: Dr Andrew-Jaja believes the singing is a way of forgetting about the world’s problems for a moment .", + "length": 116 + }, + { + "text": "S hospital it’s even more magical – thanks to a doctor who sings to each and every child that he helps to deliver.", + "length": 118 + }, + { + "text": "When I'm singing to those babies I think I'm singing to a future important person - that's the credit I give to all of them.", + "length": 124 + }, + { + "text": "Dr Andrew-Jaja’s crooning began when a more senior physician who enjoyed singing to babies approached retirement and encouraged him to do the same.", + "length": 149 + }, + { + "text": "’ Dr Andrew-Jaja’s serenading was met with a chorus of approval by internet users, with a YouTube video of him singing garnering almost one million views.", + "length": 158 + }, + { + "text": "He said: ‘It's a beautiful world we live in and forget about all the crisis going on everywhere for a moment, when you see that miracle of life in front of you.", + "length": 162 + }, + { + "text": "Dr Carey Andrew-Jaja, who works at the Magee-Womens Hospital in Pittsburgh, particularly enjoys singing Happy Birthday or What A Wonderful World to the tiny humans he’s delivered.", + "length": 181 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.6023634672164917 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:37.36447391Z", + "first_section_created": "2025-12-23T09:36:37.364820524Z", + "last_section_published": "2025-12-23T09:36:37.364996031Z", + "all_results_received": "2025-12-23T09:36:37.433166681Z", + "output_generated": "2025-12-23T09:36:37.433311286Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:37.364820524Z", + "publish_time": "2025-12-23T09:36:37.364996031Z", + "first_worker_start": "2025-12-23T09:36:37.365494251Z", + "last_worker_end": "2025-12-23T09:36:37.432222Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:37.365451549Z", + "start_time": "2025-12-23T09:36:37.365511552Z", + "end_time": "2025-12-23T09:36:37.365550553Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:37.365649Z", + "start_time": "2025-12-23T09:36:37.3658Z", + "end_time": "2025-12-23T09:36:37.432222Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:37.365453149Z", + "start_time": "2025-12-23T09:36:37.365515252Z", + "end_time": "2025-12-23T09:36:37.365573654Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:37.365427148Z", + "start_time": "2025-12-23T09:36:37.365494251Z", + "end_time": "2025-12-23T09:36:37.365516852Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1987, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/007b8fb3014d8d7b4266cf85ba1f368cfba44801.json b/data/output/007b8fb3014d8d7b4266cf85ba1f368cfba44801.json new file mode 100644 index 0000000..879ef4f --- /dev/null +++ b/data/output/007b8fb3014d8d7b4266cf85ba1f368cfba44801.json @@ -0,0 +1,370 @@ +{ + "file_name": "007b8fb3014d8d7b4266cf85ba1f368cfba44801.txt", + "total_words": 899, + "top_n_words": [ + { + "word": "the", + "count": 43 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "to", + "count": 22 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "for", + "count": 15 + }, + { + "word": "new", + "count": 15 + }, + { + "word": "program", + "count": 13 + }, + { + "word": "it", + "count": 11 + }, + { + "word": "of", + "count": 11 + }, + { + "word": "000", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "com.", + "length": 4 + }, + { + "text": "iReport.", + "length": 8 + }, + { + "text": "iReport.", + "length": 8 + }, + { + "text": "iReport.", + "length": 8 + }, + { + "text": "FuelEconomy.", + "length": 12 + }, + { + "text": "com: Radtke's purchase .", + "length": 24 + }, + { + "text": "com, was happy to oblige.", + "length": 25 + }, + { + "text": "gov, to learn the program's rules.", + "length": 34 + }, + { + "text": "The CARS program isn't for everyone.", + "length": 36 + }, + { + "text": "Consumers can go to a Web site, cars.", + "length": 37 + }, + { + "text": "The old vehicles are crushed or shredded.", + "length": 41 + }, + { + "text": "com: Read Sable's account of the purchase .", + "length": 43 + }, + { + "text": "com: How Callahan got $9,000 for her truck .", + "length": 44 + }, + { + "text": "gov: See if your vehicle qualifies for CARS .", + "length": 45 + }, + { + "text": "The credit won't go toward used-car purchases.", + "length": 46 + }, + { + "text": "The government put Sable's old Jeep at 15 mpg.", + "length": 46 + }, + { + "text": "After the credit, they paid just under $26,000.", + "length": 47 + }, + { + "text": "New large vans and pickups must get at least 15 mpg.", + "length": 52 + }, + { + "text": "\"I'll never get $9,000 for this old vehicle [any other way].", + "length": 60 + }, + { + "text": "He'll drive the PT Cruiser and let the son drive his Nissan.", + "length": 60 + }, + { + "text": "Fuel economy thresholds for new vehicles vary according to type.", + "length": 64 + }, + { + "text": "Watch CNN's Gerri Willis explain the \"cash for clunkers\" program » .", + "length": 69 + }, + { + "text": "Caroline Radtke, a 31-year-old who wrote about her purchase on iReport.", + "length": 71 + }, + { + "text": "The $9,000 in savings knocked the price to $8,900 before taxes and fees.", + "length": 72 + }, + { + "text": "And Chrysler, eager to sell vehicles, threw in its own $4,500 incentive.", + "length": 72 + }, + { + "text": "She'll be paying about $10,000 for her new vehicle after taxes and fees.", + "length": 72 + }, + { + "text": "And, like Sable, she also received a separate $4,500 credit from Chrysler.", + "length": 74 + }, + { + "text": "New cars must have a combined city/highway fuel economy of at least 22 mpg.", + "length": 75 + }, + { + "text": "New SUVs and small or medium pickup trucks or vans must get at least 18 mpg.", + "length": 76 + }, + { + "text": "His new PT Cruiser, which the program classifies as an SUV, gets a combined 21 mpg.", + "length": 83 + }, + { + "text": "Trade-ins must be less than 25 years old, and their titles must be free of any liens.", + "length": 85 + }, + { + "text": "Part of the program's intent is to get vehicles with low fuel efficiency off the road.", + "length": 86 + }, + { + "text": "But lately it started having shifting problems, and it was occasionally slipping going uphill.", + "length": 94 + }, + { + "text": "The gas-guzzling 1993 Jeep Grand Cherokee his college-student son drives went bad last weekend.", + "length": 95 + }, + { + "text": "Ordinarily Sable would have fixed it, even though the vehicle was worth perhaps $2,000 at best.", + "length": 95 + }, + { + "text": "Like Sable, Callahan, 39, got $4,500 this week to turn in the old vehicle and buy a new PT Cruiser.", + "length": 99 + }, + { + "text": "If they had sold the Trooper themselves, they might have gotten $3,000 if they were lucky, she said.", + "length": 100 + }, + { + "text": "\"I'd have been foolish not to take it,\" said Andrew Sable, who got $9,000 for his 1993 Jeep Grand Cherokee.", + "length": 107 + }, + { + "text": "She and her husband already had a newer vehicle, but she used the truck to go to work and for other in-town purposes.", + "length": 117 + }, + { + "text": "Unless it is renewed, the program will end November 1 or when funds allotted by Congress run out, whichever happens first.", + "length": 122 + }, + { + "text": "\"After driving it for eight and a half years, I wanted something more productive financially and more friendly to the Earth.", + "length": 124 + }, + { + "text": "Also, people looking to get rid of their under-18-mpg vehicle might find they can get about the same or more than a CARS credit by selling it.", + "length": 142 + }, + { + "text": "(CNN) -- Andrew Sable wasn't in the market for new wheels, but he says the federal \"cash for clunkers\" program helped him get an offer he couldn't refuse.", + "length": 154 + }, + { + "text": "\" The couple would have bought a new car without CARS, but the credit probably allowed them to get a nicer car than they otherwise would have, Radtke said.", + "length": 155 + }, + { + "text": "I'd have been foolish not to take it,\" the 43-year-old Sable, an insurance underwriter living in North Bellmore, New York, told CNN after filing a report with iReport.", + "length": 167 + }, + { + "text": "The $9,000 she saved with the credits from CARS and Chrysler isn't too shabby, considering she figures her old pickup was nearly worthless because it had so many miles.", + "length": 168 + }, + { + "text": "But, aware of the program that started this month, Sable took a $4,500 federal credit this week to trade in the Jeep and buy a new, more fuel-efficient Chrysler PT Cruiser.", + "length": 172 + }, + { + "text": "\"Without the incentives, I probably wouldn't have purchased a brand new vehicle,\" Callahan, who runs a science outreach program at the University of Utah, told CNN after filing her iReport.", + "length": 189 + }, + { + "text": "The exact credit offered through the program --- officially called the Consumer Assistance to Recycle and Save Act of 2009, or CARS -- depends on how many more miles per gallon the new vehicle gets.", + "length": 198 + }, + { + "text": "But the program worked just fine for iReporter Julie Callahan, a Salt Lake City, Utah, woman who was looking to replace her 1990 Chevy C1500 pickup truck, which had more than 350,000 miles and is rated at 15 mpg.", + "length": 212 + }, + { + "text": "Radtke and her husband this month got a $4,500 CARS credit for trading in their 2000 Isuzu Trooper (15 mpg) to buy a new Volkswagen Jetta SportWagen, a diesel-powered car that the program lists as getting 33 mpg.", + "length": 212 + }, + { + "text": "Under the $1 billion program, people will be given credits of $3,500 to $4,500 to replace gas guzzlers -- generally vehicles with a combined city/highway fuel economy of 18 miles per gallon or less -- with new vehicles that are more fuel efficient.", + "length": 248 + }, + { + "text": "\"What was going out of my [old] vehicle was bad for the planet, and you're putting so much financially into the stupid thing to fill it up because it runs out so fast,\" Radtke, a freelance graphic designer living in San Antonio, Texas, told CNN after filing her iReport.", + "length": 270 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5739532709121704 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:37.865746928Z", + "first_section_created": "2025-12-23T09:36:37.867745108Z", + "last_section_published": "2025-12-23T09:36:37.868007419Z", + "all_results_received": "2025-12-23T09:36:37.93373607Z", + "output_generated": "2025-12-23T09:36:37.933922178Z", + "total_processing_time_ms": 68, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:37.867745108Z", + "publish_time": "2025-12-23T09:36:37.868007419Z", + "first_worker_start": "2025-12-23T09:36:37.868494239Z", + "last_worker_end": "2025-12-23T09:36:37.932826Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:37.868465738Z", + "start_time": "2025-12-23T09:36:37.868539841Z", + "end_time": "2025-12-23T09:36:37.868640445Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:37.868718Z", + "start_time": "2025-12-23T09:36:37.868858Z", + "end_time": "2025-12-23T09:36:37.932826Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:37.868429536Z", + "start_time": "2025-12-23T09:36:37.868508539Z", + "end_time": "2025-12-23T09:36:37.868646245Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:37.868416236Z", + "start_time": "2025-12-23T09:36:37.868494239Z", + "end_time": "2025-12-23T09:36:37.868629844Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4864, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/007c0e1dc4f7e5355e4738f4434a960cc4cba499.json b/data/output/007c0e1dc4f7e5355e4738f4434a960cc4cba499.json new file mode 100644 index 0000000..f02afde --- /dev/null +++ b/data/output/007c0e1dc4f7e5355e4738f4434a960cc4cba499.json @@ -0,0 +1,416 @@ +{ + "file_name": "007c0e1dc4f7e5355e4738f4434a960cc4cba499.txt", + "total_words": 975, + "top_n_words": [ + { + "word": "the", + "count": 73 + }, + { + "word": "to", + "count": 38 + }, + { + "word": "a", + "count": 37 + }, + { + "word": "of", + "count": 23 + }, + { + "word": "laser", + "count": 22 + }, + { + "word": "and", + "count": 20 + }, + { + "word": "weapon", + "count": 16 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "for", + "count": 11 + }, + { + "word": "ponce", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "' The U.", + "length": 8 + }, + { + "text": "Rear Adm.", + "length": 9 + }, + { + "text": "Matthew L.", + "length": 10 + }, + { + "text": "Klunder said.", + "length": 13 + }, + { + "text": "Perfect shot!", + "length": 13 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "forces access to certain areas.", + "length": 31 + }, + { + "text": "It integrates six commercial 5.", + "length": 31 + }, + { + "text": "Matthew Klunder said when the tests began.", + "length": 42 + }, + { + "text": "In 2012, LaWS downed several unmanned aircraft in tests.", + "length": 56 + }, + { + "text": "'We were calling it a Hubble telescope on the water,' Adm.", + "length": 58 + }, + { + "text": "The range of surveillance capabilities is a closely guarded secret.", + "length": 67 + }, + { + "text": "The USS Ponce, where the radical weapon is being tested by the US Navy .", + "length": 72 + }, + { + "text": "'This is a revolutionary capability,' said Chief of Naval Research Rear Adm.", + "length": 76 + }, + { + "text": "'We have the authorities right now to use it in self-defense,' Klunder said.", + "length": 76 + }, + { + "text": "The weapons was also shown downing a drone with a single blast from its laser .", + "length": 79 + }, + { + "text": "4 kW fibre lasers with a beam combiner originated by the Naval Research Laboratory.", + "length": 83 + }, + { + "text": "4 kW fibre lasers with a beam combiner originated by the Naval Research Laboratory.", + "length": 83 + }, + { + "text": "The moment the laser weapon blows up an unmanned boat during tests in the Persian Gulf .", + "length": 88 + }, + { + "text": "Sailors were able to identify approaching vessels at ranges 'they have never dreamed of'.", + "length": 89 + }, + { + "text": "Experts say they also found a surprising second use for the system - as a surveillance tool.", + "length": 92 + }, + { + "text": "Just one controller is need, who uses a game controller to direct and fire the laser weapon .", + "length": 93 + }, + { + "text": "The laser weapon aboard the USS Ponce, where is was able to shoot boats and drone in exercises .", + "length": 96 + }, + { + "text": "In a 2011 demonstration, a laser was used to defeat multiple small boat threats from a destroyer.", + "length": 97 + }, + { + "text": "' The Navy already has demonstrated the effectiveness of lasers in a variety of maritime settings.", + "length": 98 + }, + { + "text": "Central Command has given permission for the commander of the ship to defend itself with the weapon.", + "length": 100 + }, + { + "text": "'Our nation's adversaries are pursuing a variety of ways to try and restrict our freedom to operate,' Klunder said.", + "length": 115 + }, + { + "text": "To show its accuracy, researchers put a test missile on an unmanned craft - and the laser was able to hit it perfectly .", + "length": 120 + }, + { + "text": "The deployment on Ponce will prove crucial as the Navy continues its push to provide laser weapons to the fleet at large.", + "length": 121 + }, + { + "text": "'If someone was coming to harm the USS Ponce, we could use this laser system on that threat and we would intend to do so.", + "length": 121 + }, + { + "text": "A surface warfare weapons officer aboard USS Ponce who can operate all functions of the laser-and if commanded, fire the laser weapon.", + "length": 134 + }, + { + "text": "A surface warfare weapons officer aboard USS Ponce who can operate all functions of the laser-and if commanded, fire the laser weapon.", + "length": 134 + }, + { + "text": "that will keep our Sailors and Marines safe and well-defended for years to come,' said Peter Morrison, ONR program manager for SSL-TM.", + "length": 134 + }, + { + "text": "The prototype, an improved version of the Laser Weapon System (LaWS), was installed on USS Ponce for at-sea testing in the Persian Gulf.", + "length": 136 + }, + { + "text": "Navy is has declared an experimental laser weapon on its Afloat Forward Staging Base (AFSB) in the Persian Gulf an operational asset and U.", + "length": 139 + }, + { + "text": "The prototype 30 kW-class solid-state laser (SSL) weapon system was developed under the leadership of the Naval Sea Systems Command (NAVSEA).", + "length": 141 + }, + { + "text": "The Navy has revealed a radical new laser weapon it says can shoot down missiles, boats and even drones from a warship is already is active service.", + "length": 149 + }, + { + "text": "'Spending about $1 per shot of a directed-energy source that never runs out gives us an alternative to firing costly munitions at inexpensive threats.", + "length": 150 + }, + { + "text": "'We believe the deployment on Ponce and SSL-TM will pave the way for a future acquisition program of record so we can provide this capability across the fleet.", + "length": 159 + }, + { + "text": "High-energy lasers offer an affordable and safe way to target these threats at the speed of light with extreme precision and an unlimited magazine, experts say.", + "length": 160 + }, + { + "text": "The Navy will decide next year which, if any, of the three industry prototypes are suitable to move forward and begin initial ship installation for further testing.", + "length": 164 + }, + { + "text": "The laser was accurate enough to blow up a test missile being carried on an unmanned platform during tests of the new weapon aboard the USS Ponce in the Persian Gulf.", + "length": 167 + }, + { + "text": "Klunder, chief of naval research, said that during the test sailors began using the system in ways its developers didn't fully anticipate, such as for long-range surveillance.", + "length": 175 + }, + { + "text": "The  prototype 30 kW-class solid-state laser (SSL) weapon system developed under the leadership of the Naval Sea Systems Command (NAVSEA), the LaWS integrates six commercial 5.", + "length": 177 + }, + { + "text": "Using a video game-like controller, that sailor will be able to manage the laser's power to accomplish a range of effects against a threat, from disabling to complete destruction.", + "length": 179 + }, + { + "text": "Using a video game-like controller, that sailor will be able to manage the laser's power to accomplish a range of effects against a threat, from disabling to complete destruction.", + "length": 179 + }, + { + "text": "'It's absolutely critical that we get this out to sea with our Sailors for these trials, because this very affordable technology is going to change the way we fight and save lives.", + "length": 180 + }, + { + "text": "The prototype Laser Weapon System was being tested on the USS Ponce in the Persian Gulf, and was so successfully commanders gave the weapon the go-ahead to use it to defend the ship.", + "length": 184 + }, + { + "text": "Using a video game-like controller (pictured), that sailor will be able to manage the laser's power to accomplish a range of effects against a threat, from disabling to complete destruction.", + "length": 190 + }, + { + "text": "' Navy leaders have made directed-energy weapons a top priority to counter what they call asymmetric threats, including unmanned and light aircraft and small attack boats that could be used to deny U.", + "length": 200 + }, + { + "text": "' As a result of the test, Navy officials said they planned to deploy the weapon into the Middle East for a year aboard the Ponce, allowing sailors to use the system to track potential threats and defend the ship.", + "length": 213 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.41764652729034424 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:38.368511706Z", + "first_section_created": "2025-12-23T09:36:38.370626191Z", + "last_section_published": "2025-12-23T09:36:38.370988506Z", + "all_results_received": "2025-12-23T09:36:38.461694964Z", + "output_generated": "2025-12-23T09:36:38.461925874Z", + "total_processing_time_ms": 93, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 90, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:38.370626191Z", + "publish_time": "2025-12-23T09:36:38.3708426Z", + "first_worker_start": "2025-12-23T09:36:38.371394122Z", + "last_worker_end": "2025-12-23T09:36:38.460709Z", + "total_journey_time_ms": 90, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:38.371367221Z", + "start_time": "2025-12-23T09:36:38.371445324Z", + "end_time": "2025-12-23T09:36:38.371549128Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:38.371729Z", + "start_time": "2025-12-23T09:36:38.371872Z", + "end_time": "2025-12-23T09:36:38.460709Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 88 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:38.371325119Z", + "start_time": "2025-12-23T09:36:38.371398622Z", + "end_time": "2025-12-23T09:36:38.371499926Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:38.371320219Z", + "start_time": "2025-12-23T09:36:38.371394122Z", + "end_time": "2025-12-23T09:36:38.371459625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:38.370910703Z", + "publish_time": "2025-12-23T09:36:38.370988506Z", + "first_worker_start": "2025-12-23T09:36:38.371423323Z", + "last_worker_end": "2025-12-23T09:36:38.450961Z", + "total_journey_time_ms": 80, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:38.371498526Z", + "start_time": "2025-12-23T09:36:38.371541928Z", + "end_time": "2025-12-23T09:36:38.371559329Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:38.371941Z", + "start_time": "2025-12-23T09:36:38.372067Z", + "end_time": "2025-12-23T09:36:38.450961Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 78 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:38.371384022Z", + "start_time": "2025-12-23T09:36:38.371423323Z", + "end_time": "2025-12-23T09:36:38.371456325Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:38.371426724Z", + "start_time": "2025-12-23T09:36:38.371471125Z", + "end_time": "2025-12-23T09:36:38.371478226Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 166, + "min_processing_ms": 78, + "max_processing_ms": 88, + "avg_processing_ms": 83, + "median_processing_ms": 88, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2808, + "slowest_section_id": 0, + "slowest_section_time_ms": 90 + } +} diff --git a/data/output/007c4a5dfd21e5339f96c9c244c6c7d91168a4be.json b/data/output/007c4a5dfd21e5339f96c9c244c6c7d91168a4be.json new file mode 100644 index 0000000..f0d8141 --- /dev/null +++ b/data/output/007c4a5dfd21e5339f96c9c244c6c7d91168a4be.json @@ -0,0 +1,246 @@ +{ + "file_name": "007c4a5dfd21e5339f96c9c244c6c7d91168a4be.txt", + "total_words": 544, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "to", + "count": 20 + }, + { + "word": "sandusky", + "count": 14 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "his", + "count": 12 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "with", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "s", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "Both of them have pleaded not guilty.", + "length": 37 + }, + { + "text": "He has pleaded not guilty to the charges.", + "length": 41 + }, + { + "text": "Cleland has said he is aiming for a May 14 trial for Sandusky.", + "length": 62 + }, + { + "text": "CNN's Jason Carroll and Mark Norman contributed to this report.", + "length": 63 + }, + { + "text": "The mother of those children has strongly objected to them having contact with Sandusky.", + "length": 88 + }, + { + "text": "Sandusky will be allowed to visit with eight of his grandchildren under parental supervision, Cleland ruled.", + "length": 108 + }, + { + "text": "Joe Amendola, Sandusky's attorney, said the former assistant coach and his family are happy about the ruling.", + "length": 109 + }, + { + "text": "Sandusky has been under house arrest since December, when he was charged with sexually abusing young boys over a 15-year period.", + "length": 128 + }, + { + "text": "\"Jerry, Dottie, and their entire family are very relieved by and pleased with the court's decision,\" Amendola said in a written statement.", + "length": 138 + }, + { + "text": "On Monday, the judge denied a prosecution request that jurors be selected from outside the county where the former coach is being prosecuted.", + "length": 141 + }, + { + "text": "But another judge in a custody case involving the other three grandchildren should decide whether Sandusky can visit with them, Cleland ruled.", + "length": 142 + }, + { + "text": "The decision by Judge John Cleland eases some conditions of Jerry Sandusky's house arrest, which had forbidden contact with his 11 grandchildren.", + "length": 145 + }, + { + "text": "Cleland denied that request Monday, ruling that prosecutors did not present any evidence showing that Sandusky had tried to contact children at the school.", + "length": 155 + }, + { + "text": "\" Kelly also argued that Sandusky should be required to stay indoors during his house arrest because of fears among neighbors and teachers at a nearby elementary school.", + "length": 169 + }, + { + "text": "The allegations against Sandusky led to the firing of Penn State's heralded head football coach Joe Paterno only months before he died of complications from lung cancer.", + "length": 169 + }, + { + "text": "(CNN) -- The former Penn State assistant football coach currently awaiting trial on child sex assault charges can visit with some of his grandchildren, a judge ruled Monday.", + "length": 173 + }, + { + "text": "On Monday an attorney requested that the perjury charge against Curley be dropped, arguing that Paterno's death means prosecutors no longer have a required second witness to support the charge.", + "length": 193 + }, + { + "text": "The state \"failed to present any evidence whatsoever that (Sandusky) presents a clearly defined threat to any student at the adjoining elementary school simply by being on his deck,\" Monday's the ruling says.", + "length": 208 + }, + { + "text": "Tim Curley, Penn State's former athletic director, and Gary Schultz, a former university vice president who oversaw campus police, have been charged with perjury and failing to report an alleged 2002 sexual assault of a child.", + "length": 226 + }, + { + "text": "Cleland also ruled Monday that Sandusky would be allowed to have visits from adult friends and to leave his home for meetings with attorneys and private investigators aiding in his defense, provided that a probation coordinator approves.", + "length": 237 + }, + { + "text": "State Attorney General Linda Kelly had blasted Sandusky's request to see his grandchildren, saying in a motion earlier this month that Sandusky was fortunate to be granted house arrest when \"he is alleged to have committed 52 sexual offenses.", + "length": 242 + }, + { + "text": "\"Jerry is also happy he can now have visitation with long-time friends with the prior approval of the Probation Department and will be able to continue to use the deck to his home to exercise, care for and supervise his dog, Bo, when Bo is in the yard,\" Amendola said in his statement issued after Monday's ruling.", + "length": 314 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4813595116138458 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:38.871756103Z", + "first_section_created": "2025-12-23T09:36:38.872145919Z", + "last_section_published": "2025-12-23T09:36:38.872312526Z", + "all_results_received": "2025-12-23T09:36:38.936905531Z", + "output_generated": "2025-12-23T09:36:38.937046037Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:38.872145919Z", + "publish_time": "2025-12-23T09:36:38.872312526Z", + "first_worker_start": "2025-12-23T09:36:38.872872248Z", + "last_worker_end": "2025-12-23T09:36:38.934421Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:38.872888649Z", + "start_time": "2025-12-23T09:36:38.872967752Z", + "end_time": "2025-12-23T09:36:38.873035555Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:38.873077Z", + "start_time": "2025-12-23T09:36:38.873217Z", + "end_time": "2025-12-23T09:36:38.934421Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:38.872814846Z", + "start_time": "2025-12-23T09:36:38.872872248Z", + "end_time": "2025-12-23T09:36:38.872935051Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:38.872841447Z", + "start_time": "2025-12-23T09:36:38.87291105Z", + "end_time": "2025-12-23T09:36:38.872939951Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3308, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/007c85dc808dffa08286015365f9cb9695131bac.json b/data/output/007c85dc808dffa08286015365f9cb9695131bac.json new file mode 100644 index 0000000..4412bf8 --- /dev/null +++ b/data/output/007c85dc808dffa08286015365f9cb9695131bac.json @@ -0,0 +1,556 @@ +{ + "file_name": "007c85dc808dffa08286015365f9cb9695131bac.txt", + "total_words": 869, + "top_n_words": [ + { + "word": "the", + "count": 50 + }, + { + "word": "of", + "count": 33 + }, + { + "word": "in", + "count": 27 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "on", + "count": 20 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "s", + "count": 16 + }, + { + "word": "and", + "count": 14 + }, + { + "word": "that", + "count": 14 + }, + { + "word": "iraq", + "count": 11 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "'I .", + "length": 4 + }, + { + "text": "' U.", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "RAF .", + "length": 5 + }, + { + "text": "It is .", + "length": 7 + }, + { + "text": "'There .", + "length": 8 + }, + { + "text": "forces .", + "length": 8 + }, + { + "text": "It was .", + "length": 8 + }, + { + "text": "Tornado .", + "length": 9 + }, + { + "text": "Earlier, .", + "length": 10 + }, + { + "text": "Speaking .", + "length": 10 + }, + { + "text": "But the U.", + "length": 10 + }, + { + "text": "' Pledge: .", + "length": 11 + }, + { + "text": "Rhodes told .", + "length": 13 + }, + { + "text": "reintroducing U.", + "length": 16 + }, + { + "text": "reporters the U.", + "length": 16 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "forces in a combat role.", + "length": 24 + }, + { + "text": "air strikes against ISIS .", + "length": 26 + }, + { + "text": "civilian evacuation route.", + "length": 26 + }, + { + "text": "effort in Iraq is not over.", + "length": 27 + }, + { + "text": "A British official said a .", + "length": 28 + }, + { + "text": "possible deployment in Iraq.", + "length": 28 + }, + { + "text": "must come back from vacation.", + "length": 29 + }, + { + "text": "the crisis in Downing Street.", + "length": 29 + }, + { + "text": "into combat on the ground in Iraq.", + "length": 34 + }, + { + "text": "officials said Marines and special .", + "length": 36 + }, + { + "text": "fighters encircling the barren mountain.", + "length": 40 + }, + { + "text": "Sinjar – are they 'boots on the ground'?", + "length": 42 + }, + { + "text": "civilians trapped on the mountainside in the .", + "length": 46 + }, + { + "text": "' He insisted, though, that the deployment of .", + "length": 47 + }, + { + "text": "radio interviewer,' but when people are dying, you .", + "length": 52 + }, + { + "text": "That's because most of the people are no longer there.", + "length": 54 + }, + { + "text": "autonomous Kurdish region in the north of the country.", + "length": 54 + }, + { + "text": "C130 transporter aircraft have made several drops of .", + "length": 54 + }, + { + "text": "after the Cobra meeting, Cameron said 'detailed plans .", + "length": 55 + }, + { + "text": "President Obama is taking a two-week summer vacation there .", + "length": 60 + }, + { + "text": "understood the American team on the ground is also guiding U.", + "length": 61 + }, + { + "text": "Airdrops of food and water had sustained the refugees, he said.", + "length": 63 + }, + { + "text": "'exposed, starving and dying of thirst' on the barren mountain.", + "length": 63 + }, + { + "text": "reported earlier that Obama is considering military options to .", + "length": 64 + }, + { + "text": "thirst after being trapped on a mountain side in northern Iraq .", + "length": 64 + }, + { + "text": "Chinook helicopters were yesterday sent to the region ready for .", + "length": 65 + }, + { + "text": "hours assessing the military situation and the potential for a .", + "length": 65 + }, + { + "text": "President Barack Obama's deputy national security adviser, told .", + "length": 65 + }, + { + "text": "troops to aid in the rescue of refugees would be 'different than .", + "length": 66 + }, + { + "text": "needs to be a lasting solution that gets that population to a safe .", + "length": 68 + }, + { + "text": "know it is the holiday period in our Western countries,' he told a .", + "length": 68 + }, + { + "text": "reporters in Martha's Vineyard, where Obama is currently on vacation.", + "length": 69 + }, + { + "text": "David Cameron, speaking after chairing an emergency Cobra meeting in .", + "length": 70 + }, + { + "text": "space where they can receive more permanent assistance,' Ben Rhodes, .", + "length": 70 + }, + { + "text": "stranded Yazidis and that Britain would 'play a role in delivering it.", + "length": 70 + }, + { + "text": "rescue mission to save the tens of thousands of trapped refugees left .", + "length": 71 + }, + { + "text": "are now being put in place' for an international mission to rescue the .", + "length": 72 + }, + { + "text": "jets are due to carry out reconnaissance missions of the area, and four .", + "length": 73 + }, + { + "text": "Downing Street today, said tens of thousands of refugees risked dying of .", + "length": 74 + }, + { + "text": "forces flew in on black hawk helicopters, and departed after spending 24 .", + "length": 74 + }, + { + "text": "flying back from Portugal 12 hours early to chair an emergency meeting on .", + "length": 75 + }, + { + "text": "Prime Minister returned to the UK from holiday on Wednesday morning after .", + "length": 75 + }, + { + "text": "It is the first confirmation that international forces were on Mount Sinjar.", + "length": 76 + }, + { + "text": "government now says most of them have fled to safety and don't need rescuing.", + "length": 77 + }, + { + "text": "David Cameron had revealed Britain is working on plans for an international .", + "length": 77 + }, + { + "text": "Vacation: Obama risks looking aloof as Iraq burns, but he's sent 100 troops to Mt.", + "length": 82 + }, + { + "text": "forces are going to be redeployed in Iraq in a combat role to take the fight to [ISIS].", + "length": 87 + }, + { + "text": "Humanitarian mission: Iraqi Yazidi refugees gathering near a helicopter in northern Iraq .", + "length": 90 + }, + { + "text": "handful of SAS soldiers were also on hand to 'gather intelligence', The Guardian reported.", + "length": 90 + }, + { + "text": "would consider using ground troops, but reiterated that the president has ruled out 'reintroducing U.", + "length": 101 + }, + { + "text": "Pentagon spokesman Rear Admiral John Kirby said in a statement that 'there are far fewer Yazidis on Mt.", + "length": 103 + }, + { + "text": "' It's now unclear what such a mission would entail, since most of those in need of help have gone elsewhere.", + "length": 109 + }, + { + "text": "Sinjar than previously feared, in part because of humanitarian air drops [and] air strikes on [ISIS] targets.", + "length": 109 + }, + { + "text": "French Foreign Minister Laurent Fabius said Tuesday in France that world leaders should make the crisis a priority.", + "length": 115 + }, + { + "text": "Deputy Press Secretary Eric Schultz faces reporters in Edgartown, Massachusetts, on the island of Martha's Vineyard.", + "length": 116 + }, + { + "text": "Tens of thousands of people from the minority Yazidi population were trapped in the mountains of northern Iraq, but the U.", + "length": 122 + }, + { + "text": "' Also helpful, he said, were 'the efforts of the Peshmerga and the ability of thousands of Yazidis to evacuate from the mountain.", + "length": 130 + }, + { + "text": "' White House spokesman Eric Schultz also said during a press briefing on Wednesday that any rescue mission would not have an impact on whether 'U.", + "length": 147 + }, + { + "text": "Defense Secretary Chuck Hagel told the Associated Press that it's now far less likely that American military personnel will undertake any sort of rescue mission.", + "length": 161 + }, + { + "text": "help the refugees, who attempted to flee Iraq to avoid ISIS, a militant group that says it has established an 'Islamic state' consisting of parts of Syria and Iraq.", + "length": 164 + }, + { + "text": "Mr Cameron declined to give any details of the mission – such as whether Chinook helicopters (pictured) are being sent to the region could play a role in any evacuation .", + "length": 172 + }, + { + "text": "Thousands of members of the Yazidi religious minority remain stranded in the mountains outside the town of Sinjar, which the Islamic State group captured earlier this month .", + "length": 174 + }, + { + "text": "' Obama plans to return to the White House briefly this weekend, but has spent his time on the Massachusetts island golfing, beachcombing and dining – often with a retinue of fans in hot pursuit.", + "length": 197 + }, + { + "text": "British and American forces landed Wednesday on Iraq's Mount Sinjar, but plans to airlift as many as 30,000 starving and dying Yazidi refugees thought to be stranded there are not likely moving forward.", + "length": 202 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6130784749984741 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:39.373091624Z", + "first_section_created": "2025-12-23T09:36:39.373438938Z", + "last_section_published": "2025-12-23T09:36:39.373848654Z", + "all_results_received": "2025-12-23T09:36:39.451321579Z", + "output_generated": "2025-12-23T09:36:39.451493086Z", + "total_processing_time_ms": 78, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 77, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:39.373438938Z", + "publish_time": "2025-12-23T09:36:39.373677947Z", + "first_worker_start": "2025-12-23T09:36:39.374147666Z", + "last_worker_end": "2025-12-23T09:36:39.450466Z", + "total_journey_time_ms": 77, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:39.374116065Z", + "start_time": "2025-12-23T09:36:39.374177168Z", + "end_time": "2025-12-23T09:36:39.374276572Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:39.374382Z", + "start_time": "2025-12-23T09:36:39.374567Z", + "end_time": "2025-12-23T09:36:39.450466Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:39.374122865Z", + "start_time": "2025-12-23T09:36:39.374190668Z", + "end_time": "2025-12-23T09:36:39.374322273Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:39.374058463Z", + "start_time": "2025-12-23T09:36:39.374147666Z", + "end_time": "2025-12-23T09:36:39.374201169Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:39.373773551Z", + "publish_time": "2025-12-23T09:36:39.373848654Z", + "first_worker_start": "2025-12-23T09:36:39.374261771Z", + "last_worker_end": "2025-12-23T09:36:39.430286Z", + "total_journey_time_ms": 56, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:39.37422577Z", + "start_time": "2025-12-23T09:36:39.374261771Z", + "end_time": "2025-12-23T09:36:39.374269671Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:39.37443Z", + "start_time": "2025-12-23T09:36:39.374559Z", + "end_time": "2025-12-23T09:36:39.430286Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 55 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:39.37422687Z", + "start_time": "2025-12-23T09:36:39.374267971Z", + "end_time": "2025-12-23T09:36:39.374280472Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:39.374223869Z", + "start_time": "2025-12-23T09:36:39.374264571Z", + "end_time": "2025-12-23T09:36:39.374269171Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 130, + "min_processing_ms": 55, + "max_processing_ms": 75, + "avg_processing_ms": 65, + "median_processing_ms": 75, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2639, + "slowest_section_id": 0, + "slowest_section_time_ms": 77 + } +} diff --git a/data/output/007cad24a61d4f6293c8506232d1d7114d621267.json b/data/output/007cad24a61d4f6293c8506232d1d7114d621267.json new file mode 100644 index 0000000..1a99ab5 --- /dev/null +++ b/data/output/007cad24a61d4f6293c8506232d1d7114d621267.json @@ -0,0 +1,480 @@ +{ + "file_name": "007cad24a61d4f6293c8506232d1d7114d621267.txt", + "total_words": 1300, + "top_n_words": [ + { + "word": "the", + "count": 78 + }, + { + "word": "of", + "count": 45 + }, + { + "word": "and", + "count": 38 + }, + { + "word": "a", + "count": 23 + }, + { + "word": "in", + "count": 23 + }, + { + "word": "is", + "count": 22 + }, + { + "word": "fake", + "count": 21 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "market", + "count": 18 + }, + { + "word": "at", + "count": 17 + } + ], + "sorted_sentences": [ + { + "text": "7%).", + "length": 4 + }, + { + "text": "\" he asked.", + "length": 11 + }, + { + "text": "Market size .", + "length": 13 + }, + { + "text": "Alarming signs .", + "length": 16 + }, + { + "text": "\"There are two situations.", + "length": 26 + }, + { + "text": "All have a regular clientele.", + "length": 29 + }, + { + "text": "You can't make out the differences.", + "length": 35 + }, + { + "text": "8%) and tobacco at Rs 8,965 crore (15.", + "length": 38 + }, + { + "text": "\"We sell it to the retailers at Rs 55-60.", + "length": 41 + }, + { + "text": "6%), mobile phones at Rs 9,042 crore (20.", + "length": 41 + }, + { + "text": "9%), auto components at Rs 9,198 crore (29.", + "length": 43 + }, + { + "text": "The sales of industries are badly affected.", + "length": 43 + }, + { + "text": "\"Can you say that this is not an original bottle?", + "length": 49 + }, + { + "text": "4%), FMCG (personal goods) at Rs 15,035 crore (25.", + "length": 50 + }, + { + "text": "manufacturing dates and batch numbers on the packets.", + "length": 53 + }, + { + "text": "We have old and trusted traders from Delhi and outside.", + "length": 55 + }, + { + "text": "Nehru Place: Asia's biggest computer accessories market.", + "length": 56 + }, + { + "text": "\"This is causing losses to the government due to tax evasion.", + "length": 61 + }, + { + "text": "Sri Ram Khanna of Consumer Voice said counterfeiting is rampant.", + "length": 64 + }, + { + "text": "In this situation he gets cheated as he is paying for the original.", + "length": 67 + }, + { + "text": "In one case a consumer buys the fake item without being aware of it.", + "length": 68 + }, + { + "text": "Kashmere Market: One of the largest auto parts markets in the country.", + "length": 70 + }, + { + "text": "Kashmere Gate is one of the largest auto parts markets in the country.", + "length": 70 + }, + { + "text": "\"Heavy taxes on products have led to flourishing of the fake goods markets.", + "length": 75 + }, + { + "text": "Once consumed, the result can be dangerous in case of fake products,\" he added.", + "length": 79 + }, + { + "text": "Some shops sell fake branded auto spares at one fifth the original price or less.", + "length": 81 + }, + { + "text": "One can get all sorts of pirated software and fake hardware parts from this place .", + "length": 83 + }, + { + "text": "Of this Delhi alone contributes nearly 75 per cent to the production of fake goods.", + "length": 83 + }, + { + "text": "To avoid paying high taxes, people get trapped into buying cheaper products,\" he added.", + "length": 87 + }, + { + "text": "In the second case, there are counterfeit products which are available at lower prices.", + "length": 87 + }, + { + "text": "Some shops sell fake-branded auto spares at one fifth of the original price or even less .", + "length": 90 + }, + { + "text": "The highest loss in terms of revenue is from FMCG (packaged goods) at Rs 20,378 crore (23.", + "length": 90 + }, + { + "text": "The study further estimates an annual sales loss to industry of a whopping Rs 1,00,000 crore.", + "length": 93 + }, + { + "text": "\"Suppose, a mechanic tells you about the low quality auto part before fitting it in your vehicle.", + "length": 97 + }, + { + "text": "When asked about the packaging and appearance, he hesitatingly showed a bottle of a wellknown brand.", + "length": 100 + }, + { + "text": "One dealer who admitted to selling fake cosmetic items said: \"We don't sell these items to every trader.", + "length": 104 + }, + { + "text": "Here, you are not cheated as you are aware of this, but, the product is obviously of sub-standard quality.", + "length": 106 + }, + { + "text": "Rajput said excessive taxation is one of the main reasons behind flourishing of the business of fake items.", + "length": 107 + }, + { + "text": "Nehru Place is big computer accessories markets and one can get pirated software and fake hardware from there.", + "length": 110 + }, + { + "text": "\"In most cases, people are being cheated as they are not aware that the products they are buying are fake ones.", + "length": 111 + }, + { + "text": "It is also causing losses to the consumers as they are being cheated into purchasing substandard items,\" he added.", + "length": 114 + }, + { + "text": "And, the retailer will sell it to the customers as a genuine product at the maximum price or a bit less,\" he added.", + "length": 115 + }, + { + "text": "The sale of spurious products in these product categories has been estimated to be close to Rs 15,000 crore by 2013.", + "length": 116 + }, + { + "text": "According to a report by industry body Assocham, the current market size of counterfeit products is Rs 45,000 crore.", + "length": 116 + }, + { + "text": "Bhagirath Place Market: Wholesale market at Chandni Chowk where one can get fake and smuggled electronic appliances .", + "length": 117 + }, + { + "text": "\" On the price difference, he said, while a case of original talcum powder would Rs 120, the fake one costs Rs 50 or less.", + "length": 122 + }, + { + "text": "The industry body estimated that by the end of 2013, counterfeit goods market in India will be worth more than Rs 55,000 crore.", + "length": 127 + }, + { + "text": "Sadar Bazar is a wholesale market in Old Delhi where fake packaged FMCG goods and cosmetic items can be bought from select shops.", + "length": 129 + }, + { + "text": "Assocham's study showed that other segments that were hit by fake goods sale are vegetable oils, spices, ghee and watch components.", + "length": 131 + }, + { + "text": "A thriving organised market in counterfeit items, with Delhi at its centre, is driving an economy running into several thousand crores.", + "length": 135 + }, + { + "text": "The report indicates that besides being a big market for fake products, the Capital is the main transit point for the sale of such goods.", + "length": 137 + }, + { + "text": "\" The sale of counterfeit products is not only affecting consumers but it's also causing heavy losses to the government as well as the private sector.", + "length": 150 + }, + { + "text": "Gaffar Market has come to be known for counterfeit and smuggled mobile phones and accessories, cosmetic items and commonly used electronic appliances.", + "length": 150 + }, + { + "text": "Sadar Bazar: Wholesale market in Old Delhi where one can get all sorts of fake items, including packaged FMCG and cosmetic items from selected shops .", + "length": 150 + }, + { + "text": "Fake: A report by Assocham indicates that besides being a big market for fake products, the Capital is the main transit point for the sale of such goods .", + "length": 154 + }, + { + "text": "Last year Delhi Police raided a store in Central Delhi area, seizing a huge stock of counterfeit cosmetics, including those labelled as Revlon, Ponds and Dove.", + "length": 159 + }, + { + "text": "The police action unearthed a fake packaging factory, bringing the focus on the need for secure packaging and labelling for cosmetics and pharma items in India.", + "length": 160 + }, + { + "text": "\" Anil Rajput, chairman of Ficci-CASCADE, said that almost all packaged and non packaged items are being copied and Delhi is the centre of the illegal business.", + "length": 160 + }, + { + "text": "It also said the maximum loss for FMCG companies works out to 45 per cent, though on an average, it is around 25 per cent of the market share of well-known products.", + "length": 165 + }, + { + "text": "The key sectors which were included in the study were auto components, alcohol, computer hardware, FMCG (personal goods), FMCG (packaged goods), mobile phones and tobacco.", + "length": 171 + }, + { + "text": "They flourish because of lower costs but the investigation revealed that these goods find their way into the market and are often sold as genuine products at the MRP of a regular item.", + "length": 184 + }, + { + "text": "Anil Rajput, chairman, Ficci-CASCADE (Committee Against Smuggling and Counterfeiting Activities Destroying the Economy) said the business of fake items is a cause of concern for everybody.", + "length": 188 + }, + { + "text": "Some markets where these goods are easily available and sold openly like any legitimate business are Gaffar Market, Sadar Bazar, Khari Baoli, Bhagirath Place, Nehru Place and Kashmere Gate.", + "length": 189 + }, + { + "text": "Explaining the techniques adopted by the manufacturers, a police officer said: \"Nowaday's the manufactures situated in Outer Delhi and northeast Delhi areas have started using advanced machines.", + "length": 194 + }, + { + "text": "Similarly, a study by Assocham said Delhi accounts for 75 per cent of the rapidly growing counterfeit industry in the country and the current market size of the counterfeit products is Rs 45,000 crore.", + "length": 201 + }, + { + "text": "Gaffar Market: One of the largest markets in the country for counterfeit and smuggled products in the categories of mobile phones and accessories, cosmetic items and commonly used electronic appliances .", + "length": 203 + }, + { + "text": "Big companies like Philips India, Hindustan Unilever and Heinz India have raised an alarm on this front and their complaints saw separate cases being registered with the Economic Offences Wing (EOW) of Delhi Police last month.", + "length": 226 + }, + { + "text": "The maximum tax loss on account of smuggled and counterfeit products to the government is from the tobacco sector at Rs 6, 240 crore followed by FMCG (packaged food) at Rs 5,660 crore and FMCG (personal goods) at Rs 4,646 crore.", + "length": 228 + }, + { + "text": "The Ficci-CASCADE study on \"Socio-economic Impact of Counterfeiting, Smuggling and Tax Evasion in Seven Key Industry Sectors\" revealed that the estimated annual tax loss to the government in 2012 is estimated at Rs 26,190 crore.", + "length": 228 + }, + { + "text": "A Mail Today investigation has discovered that skillfully-run establishments sell anything from cosmetic products, packaged items of common use, electronic appliances, computer accessories, auto parts mobile phone accessories and more making the National Capital the hub of counterfeit goods.", + "length": 292 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.6527898460626602 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:39.874594351Z", + "first_section_created": "2025-12-23T09:36:39.876455926Z", + "last_section_published": "2025-12-23T09:36:39.876864143Z", + "all_results_received": "2025-12-23T09:36:39.953973953Z", + "output_generated": "2025-12-23T09:36:39.954194262Z", + "total_processing_time_ms": 79, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 77, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:39.876455926Z", + "publish_time": "2025-12-23T09:36:39.876705336Z", + "first_worker_start": "2025-12-23T09:36:39.877218257Z", + "last_worker_end": "2025-12-23T09:36:39.953198Z", + "total_journey_time_ms": 76, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:39.877122753Z", + "start_time": "2025-12-23T09:36:39.877218257Z", + "end_time": "2025-12-23T09:36:39.877425465Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:39.877457Z", + "start_time": "2025-12-23T09:36:39.877591Z", + "end_time": "2025-12-23T09:36:39.953198Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 75 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:39.877269859Z", + "start_time": "2025-12-23T09:36:39.877354762Z", + "end_time": "2025-12-23T09:36:39.877446866Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:39.877200356Z", + "start_time": "2025-12-23T09:36:39.877259358Z", + "end_time": "2025-12-23T09:36:39.877337462Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:39.876764339Z", + "publish_time": "2025-12-23T09:36:39.876864143Z", + "first_worker_start": "2025-12-23T09:36:39.877252958Z", + "last_worker_end": "2025-12-23T09:36:39.947778Z", + "total_journey_time_ms": 71, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:39.877442566Z", + "start_time": "2025-12-23T09:36:39.877513969Z", + "end_time": "2025-12-23T09:36:39.87755207Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:39.877629Z", + "start_time": "2025-12-23T09:36:39.877772Z", + "end_time": "2025-12-23T09:36:39.947778Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:39.877371263Z", + "start_time": "2025-12-23T09:36:39.877428065Z", + "end_time": "2025-12-23T09:36:39.877491768Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:39.877200356Z", + "start_time": "2025-12-23T09:36:39.877252958Z", + "end_time": "2025-12-23T09:36:39.877274759Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 145, + "min_processing_ms": 70, + "max_processing_ms": 75, + "avg_processing_ms": 72, + "median_processing_ms": 75, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3825, + "slowest_section_id": 0, + "slowest_section_time_ms": 76 + } +} diff --git a/data/output/007cc93d2ee0eeee6803cc3c3b4e47f7e2813c22.json b/data/output/007cc93d2ee0eeee6803cc3c3b4e47f7e2813c22.json new file mode 100644 index 0000000..64addbe --- /dev/null +++ b/data/output/007cc93d2ee0eeee6803cc3c3b4e47f7e2813c22.json @@ -0,0 +1,254 @@ +{ + "file_name": "007cc93d2ee0eeee6803cc3c3b4e47f7e2813c22.txt", + "total_words": 535, + "top_n_words": [ + { + "word": "the", + "count": 36 + }, + { + "word": "to", + "count": 17 + }, + { + "word": "bbc", + "count": 16 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "it", + "count": 13 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "is", + "count": 8 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "by", + "count": 6 + }, + { + "word": "about", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Alasdair Glennie .", + "length": 18 + }, + { + "text": "Pictured is BBC Media City .", + "length": 28 + }, + { + "text": "5billion licence fee revenue.", + "length": 29 + }, + { + "text": "5billion licence fee revenue .", + "length": 30 + }, + { + "text": "Last year, the NAO published two damning reports exposing BBC waste.", + "length": 68 + }, + { + "text": "And he said there are further delays because the BBC controls when his findings are published.", + "length": 94 + }, + { + "text": "The first found it blew £100million on a failed IT project known as the Digital Media Initiative.", + "length": 98 + }, + { + "text": "However, the BBC only provides access ‘by agreement’, meaning it is able to block investigators.", + "length": 100 + }, + { + "text": "He said it can be so hard to extract information from the corporation that he has trouble keeping his staff’s morale up.", + "length": 122 + }, + { + "text": "But Sir Amyas revealed the Digital Media Initiative report was delayed by eight months because the BBC blocked access to key documents.", + "length": 135 + }, + { + "text": "The watchdog’s director, Sir Amyas Morse, called for statutory powers to force BBC bosses to hand over evidence about how it uses its annual £3.", + "length": 147 + }, + { + "text": "’ The watchdog's director, Sir Amyas Morse, called for statutory powers to force BBC bosses to hand over evidence about how it uses its annual £3.", + "length": 149 + }, + { + "text": "He added: It can be difficult to keep motivation of my people going … when they have to wait a long time for information and it holds the report up.", + "length": 154 + }, + { + "text": "The second, which revealed that the corporation handed out £369million in severance pay-offs to its staff over eight years prompted parliamentary enquiries.", + "length": 157 + }, + { + "text": "A BBC spokesman said: 'We welcome Sir Amyas' positive comments about the management and governance of the BBC, our work on reducing costs and targets we've achieved.", + "length": 165 + }, + { + "text": "The National Audit Office (NAO) blamed BBC 'gatekeepers' for stopping it from viewing key documents about financial scandals, delaying one crucial report by eight months.", + "length": 170 + }, + { + "text": "The BBC is too secretive and has stopped official investigators from probing its huge pay deals and IT fiascos, the head of the Government spending watchdog said yesterday.", + "length": 172 + }, + { + "text": "The National Audit Office (NAO) blamed BBC ‘gatekeepers’ for stopping it from viewing key documents about financial scandals, delaying one crucial report by eight months.", + "length": 174 + }, + { + "text": "’ The NAO can probe the financial efficiency of all Government departments and agencies, as well as police and the NHS, and has legal powers to force them to reveal documents.", + "length": 177 + }, + { + "text": "‘There are an excessive number of gatekeepers in the organisation who feel they can apply their judgment rather than relying on our professionalism with regards to what we might see.", + "length": 184 + }, + { + "text": "Giving evidence to the Commons culture, media and sport committee, he said: ‘It is possible, particularly in difficult areas, to find the BBC doesn’t provide evidence where in their judgment it is covered by commercial or confidentiality grounds, or indeed privacy grounds.", + "length": 277 + }, + { + "text": "’ The chair of the culture, media and sport committee, Tory MP John Whittingdale, agreed with his criticism: ‘There is a perception that the BBC is an enormously bloated, bureaucratic organisation, with more people carrying out a function than would be the case in any of the equivalent commercial broadcasters.", + "length": 315 + }, + { + "text": "'We are committed to openness and transparency as the content of previous National Audit Office reports has shown and we have a good working relationship with the NAO which already has full access to the BBC's operations, with the exception of editorial areas, as protecting the BBC’s editorial independence is paramount.", + "length": 323 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8679473400115967 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:40.377615039Z", + "first_section_created": "2025-12-23T09:36:40.377957653Z", + "last_section_published": "2025-12-23T09:36:40.378178662Z", + "all_results_received": "2025-12-23T09:36:40.442000636Z", + "output_generated": "2025-12-23T09:36:40.442158243Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:40.377957653Z", + "publish_time": "2025-12-23T09:36:40.378178662Z", + "first_worker_start": "2025-12-23T09:36:40.378791087Z", + "last_worker_end": "2025-12-23T09:36:40.441078Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:40.378764086Z", + "start_time": "2025-12-23T09:36:40.378826188Z", + "end_time": "2025-12-23T09:36:40.378882891Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:40.378947Z", + "start_time": "2025-12-23T09:36:40.379133Z", + "end_time": "2025-12-23T09:36:40.441078Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:40.378732184Z", + "start_time": "2025-12-23T09:36:40.378791087Z", + "end_time": "2025-12-23T09:36:40.37886229Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:40.378731484Z", + "start_time": "2025-12-23T09:36:40.378800087Z", + "end_time": "2025-12-23T09:36:40.378829488Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3309, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/007d0061d750bd7fbeac518fec1f7e4e4defaf8c.json b/data/output/007d0061d750bd7fbeac518fec1f7e4e4defaf8c.json new file mode 100644 index 0000000..1066231 --- /dev/null +++ b/data/output/007d0061d750bd7fbeac518fec1f7e4e4defaf8c.json @@ -0,0 +1,322 @@ +{ + "file_name": "007d0061d750bd7fbeac518fec1f7e4e4defaf8c.txt", + "total_words": 575, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "flu", + "count": 14 + }, + { + "word": "in", + "count": 14 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "to", + "count": 13 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "is", + "count": 9 + }, + { + "word": "s", + "count": 9 + }, + { + "word": "lydia", + "count": 8 + }, + { + "word": "season", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "com reads.", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": ",' said Bresee.", + "length": 15 + }, + { + "text": "According to NBCDFW.", + "length": 20 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "12:31 EST, 8 January 2014 .", + "length": 27 + }, + { + "text": "14:49 EST, 8 January 2014 .", + "length": 27 + }, + { + "text": "and threatens Americans nationwide.", + "length": 35 + }, + { + "text": "'There is still a lot of season to come.", + "length": 40 + }, + { + "text": "Lydia's circumstance is increasingly common.", + "length": 44 + }, + { + "text": "In South Carolina, nine have died since September.", + "length": 50 + }, + { + "text": "The color brown indicates widespread flu activity.", + "length": 50 + }, + { + "text": "'The virus is all around the United States right now.", + "length": 53 + }, + { + "text": "If folks haven't been vaccinated, we recommend they do it now.", + "length": 62 + }, + { + "text": "In Dallas County alone, 17 flu deaths have been reported since September .", + "length": 74 + }, + { + "text": "Joe Bresee of the Center for Disease Control's influenze division said Sunday.", + "length": 78 + }, + { + "text": "The southern state of North carolina had also reported 13 deaths by last week.", + "length": 78 + }, + { + "text": "Texas has been hard-hit this year as swine flu, or H1N1, pummels the Southern U.", + "length": 80 + }, + { + "text": "Elsewhere in Texas, 13 had been reported dead from the Houston area by last week.", + "length": 81 + }, + { + "text": "Flu season last each year from October to April and can peak in January or February.", + "length": 84 + }, + { + "text": "Rampant: The Southern United States are among the hardest hit this year by influenza.", + "length": 85 + }, + { + "text": "com, health officials in Dallas and Denton counties had by Tuesday confirmed 19 deaths.", + "length": 87 + }, + { + "text": "Lydia Kizziar's tragic death came as her home state of Texas saw an uptick in flu cases.", + "length": 88 + }, + { + "text": "' This season's virus has killed six children in the United States, according to CDC data.", + "length": 90 + }, + { + "text": "While the south appears to be suffering the most, everyone needs to remain mindful of the threat.", + "length": 97 + }, + { + "text": "Swine flu: The 2013-2014 flu vaccine accounts for various strains of the virus, including H1N1, or the swine flu .", + "length": 114 + }, + { + "text": "Even as reports of infection and death tolls hit a steep rise, experts warn that the season is still on an upslope.", + "length": 115 + }, + { + "text": "A memorial fund has been set up for Lydia Kizziar in order to help her family financially through this painful time.", + "length": 116 + }, + { + "text": "A 13-year-old Texas girl has died of influenza, taking this season's swine flu death toll to 17 in Dallas County alone.", + "length": 119 + }, + { + "text": "Tragic: 13-year-old Lydia Kizziar died Saturday, becoming the 17th confirmed person to die during this rampant flu season .", + "length": 123 + }, + { + "text": "Looking for help: A memorial fund has been set up for Lydia (right) to help her unemployed parents pay for the Texas girl's funeral .", + "length": 133 + }, + { + "text": "'They are unemployed, recently, and also their youngest daughter, Katie, is still in the hospital from the same illness that took their sweet Lydia.", + "length": 148 + }, + { + "text": "Eye-opening: CDC data suggests the H1N1 strain, which became so virulent in the 2009-2010 season, is the predominant flu virus being spread this year .", + "length": 151 + }, + { + "text": "Lydia Christine Kizziar of Carrollton was just a seventh grader when she passed away Saturday and her little sister is in the hospital fighting off the very same illness.", + "length": 170 + }, + { + "text": "'The Kizziar family is in need of financial blessings to help with the cost of care and funeral expenses for the passing of their sweet, young daughter, Lydia,' the page on youcaring.", + "length": 183 + }, + { + "text": "'It's too early to tell how severe it's going to be but we're still on the up slope of the flu season, so what we can expect is more flu, more intense disease and more deaths over the next few weeks,' Dr.", + "length": 204 + }, + { + "text": "While the very old and very young are usually the hardest hit by the flu, on Monday the Garland, Texas health department revealed that the two most recent mortalities were seen in patients under 50 with no other health conditions.", + "length": 230 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6893835067749023 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:40.878915758Z", + "first_section_created": "2025-12-23T09:36:40.879247972Z", + "last_section_published": "2025-12-23T09:36:40.87944468Z", + "all_results_received": "2025-12-23T09:36:40.948778976Z", + "output_generated": "2025-12-23T09:36:40.948943983Z", + "total_processing_time_ms": 70, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:40.879247972Z", + "publish_time": "2025-12-23T09:36:40.87944468Z", + "first_worker_start": "2025-12-23T09:36:40.879962401Z", + "last_worker_end": "2025-12-23T09:36:40.947814Z", + "total_journey_time_ms": 68, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:40.8799377Z", + "start_time": "2025-12-23T09:36:40.880011703Z", + "end_time": "2025-12-23T09:36:40.880098806Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:40.880058Z", + "start_time": "2025-12-23T09:36:40.880209Z", + "end_time": "2025-12-23T09:36:40.947814Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 67 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:40.8799513Z", + "start_time": "2025-12-23T09:36:40.880008202Z", + "end_time": "2025-12-23T09:36:40.880082705Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:40.879893898Z", + "start_time": "2025-12-23T09:36:40.879962401Z", + "end_time": "2025-12-23T09:36:40.879984202Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 67, + "min_processing_ms": 67, + "max_processing_ms": 67, + "avg_processing_ms": 67, + "median_processing_ms": 67, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3249, + "slowest_section_id": 0, + "slowest_section_time_ms": 68 + } +} diff --git a/data/output/007d33fe39f1eed3f58561b7ed8abcc5deb2c5e8.json b/data/output/007d33fe39f1eed3f58561b7ed8abcc5deb2c5e8.json new file mode 100644 index 0000000..1c846cc --- /dev/null +++ b/data/output/007d33fe39f1eed3f58561b7ed8abcc5deb2c5e8.json @@ -0,0 +1,254 @@ +{ + "file_name": "007d33fe39f1eed3f58561b7ed8abcc5deb2c5e8.txt", + "total_words": 470, + "top_n_words": [ + { + "word": "the", + "count": 20 + }, + { + "word": "and", + "count": 17 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "swanson", + "count": 7 + }, + { + "word": "with", + "count": 7 + }, + { + "word": "mr", + "count": 6 + }, + { + "word": "on", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Investigators .", + "length": 15 + }, + { + "text": "Sophie Jane Evans .", + "length": 19 + }, + { + "text": "a cult fan base of the rap-metal duo Insane Clown Posse.", + "length": 56 + }, + { + "text": "Sorry we are not currently accepting comments on this article.", + "length": 62 + }, + { + "text": "The cult fan base has been deemed a 'loosely organised gang' by the FBI.", + "length": 72 + }, + { + "text": "believe those involved in the incident may be affiliated with Juggalos, .", + "length": 73 + }, + { + "text": "Hurst, of Hebron, and Edwards, of Williamsburg, Virginia, were arrested on Monday.", + "length": 82 + }, + { + "text": "Rap duo: Insane Clown Posse, Shaggy 2 Dope, left, and Violent J, pose in their stage makeup in 1999 .", + "length": 101 + }, + { + "text": "Juggalos are fans of the rap-metal duo Insane Clown Posse or another Psychopathic Records hip hop group.", + "length": 104 + }, + { + "text": "Mr Swanson was taken to Peninsula Regional Medical Center and was later airlifted to a Baltimore burn center.", + "length": 109 + }, + { + "text": "Officials said Mr Swanson's tattoo was consistent with the cult, which is deemed a 'loosely organised gang' by the FBI.", + "length": 119 + }, + { + "text": "He was then reportedly left on the ground without medical treatment for several hours, before being driven to the hospital in Salisbury.", + "length": 136 + }, + { + "text": "Mr Lewis said Mr Swanson had been 'savagely beaten and nearly killed by two cohabitants' during the assault in the early hours of Monday morning.", + "length": 145 + }, + { + "text": "Juggalos and the artists they support hold a festival, dubbed 'The Gathering of the Juggalos', every year, which attracts more than 100,000 fans.", + "length": 145 + }, + { + "text": "He remains there in a critical condition with 'extensive' injuries, including severe burns, Wicomico County Sheriff Mike Lewis said in a statement.", + "length": 147 + }, + { + "text": "They then unsuccessfully tried to forcibly remove a tattoo from his arm, before dousing his arm with a flammable liquid and setting it alight, it is claimed.", + "length": 157 + }, + { + "text": "Two men have been charged with attempted murder after allegedly trying to carve a tattoo from their housemate's arm before setting him on fire, officials have said.", + "length": 164 + }, + { + "text": "Paul Hurst, 33, and Cary Edwards, 35, reportedly savagely beat up 31-year-old Zachary Swanson following an argument at their home in Hebron in Wicomico County, Maryland.", + "length": 169 + }, + { + "text": "They have each been charged with attempted first-degree and second-degree murder, first-degree and second-degree assault, reckless endangerment and use of a deadly weapon.", + "length": 171 + }, + { + "text": "The Wicomico Bureau of Investigation is investigating the incident, while detectives are executing a search warrant on the trio's house and the vehicle used to transport Mr Swanson to hospital.", + "length": 193 + }, + { + "text": "They have developed their own slang and characteristics, including drinking and spraying the soft drink Faygo, carrying hatchets, listening to 'horrorcore 'and wearing clown or corpse face paint.", + "length": 195 + }, + { + "text": "Scene: Hurst and Edwards reportedly 'savagely beat and nearly killed' Mr Swanson following an argument at their home in Rockawalkin Road (pictured) in Hebron, Maryland, in the early hours of Monday morning .", + "length": 207 + }, + { + "text": "Suspects: Paul Hurst (left), 33, and Cary Edwards (right), 35, have been charged with attempted murder after allegedly trying to carve a tattoo from 31-year-old Zachary Swanson's arm, before setting him on fire .", + "length": 212 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7059253454208374 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:41.380503489Z", + "first_section_created": "2025-12-23T09:36:41.3807833Z", + "last_section_published": "2025-12-23T09:36:41.380966308Z", + "all_results_received": "2025-12-23T09:36:41.449367967Z", + "output_generated": "2025-12-23T09:36:41.449533173Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 68, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:41.3807833Z", + "publish_time": "2025-12-23T09:36:41.380966308Z", + "first_worker_start": "2025-12-23T09:36:41.381421126Z", + "last_worker_end": "2025-12-23T09:36:41.448249Z", + "total_journey_time_ms": 67, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:41.381406425Z", + "start_time": "2025-12-23T09:36:41.381463628Z", + "end_time": "2025-12-23T09:36:41.38152123Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:41.381676Z", + "start_time": "2025-12-23T09:36:41.38182Z", + "end_time": "2025-12-23T09:36:41.448249Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 66 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:41.381446427Z", + "start_time": "2025-12-23T09:36:41.38150733Z", + "end_time": "2025-12-23T09:36:41.381571732Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:41.381373324Z", + "start_time": "2025-12-23T09:36:41.381421126Z", + "end_time": "2025-12-23T09:36:41.381441227Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 66, + "min_processing_ms": 66, + "max_processing_ms": 66, + "avg_processing_ms": 66, + "median_processing_ms": 66, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2880, + "slowest_section_id": 0, + "slowest_section_time_ms": 67 + } +} diff --git a/data/output/007d623bca383a2fc28355de2d293d16f51849bf.json b/data/output/007d623bca383a2fc28355de2d293d16f51849bf.json new file mode 100644 index 0000000..5a633f7 --- /dev/null +++ b/data/output/007d623bca383a2fc28355de2d293d16f51849bf.json @@ -0,0 +1,294 @@ +{ + "file_name": "007d623bca383a2fc28355de2d293d16f51849bf.txt", + "total_words": 815, + "top_n_words": [ + { + "word": "the", + "count": 34 + }, + { + "word": "to", + "count": 29 + }, + { + "word": "in", + "count": 22 + }, + { + "word": "a", + "count": 19 + }, + { + "word": "of", + "count": 17 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "i", + "count": 15 + }, + { + "word": "s", + "count": 13 + }, + { + "word": "he", + "count": 12 + }, + { + "word": "it", + "count": 12 + } + ], + "sorted_sentences": [ + { + "text": "We have to start well now.", + "length": 26 + }, + { + "text": "Bell is convinced he can deliver the goods.", + "length": 43 + }, + { + "text": "There are certain areas we’ve identified.", + "length": 43 + }, + { + "text": "But I probably didn’t expect to be in this position.", + "length": 54 + }, + { + "text": "England’s classiest Test batsman is clear on his role.", + "length": 56 + }, + { + "text": "‘I wouldn’t say I’ve changed anything dramatically.", + "length": 57 + }, + { + "text": "It would be good to do it against Australia and kick on from there.", + "length": 67 + }, + { + "text": "‘I’m someone with experience who can play in a variety of roles.", + "length": 68 + }, + { + "text": "‘I’ve tried to work hard even in Sri Lanka when I wasn’t playing.", + "length": 71 + }, + { + "text": "When you play for England you always try to get better and not stand still.", + "length": 75 + }, + { + "text": "‘It was disappointing but those opening 10 overs from Australia were world-class.", + "length": 83 + }, + { + "text": "’ Bell plays a shot past India's bowler Stuart Binny during their one-day clash .", + "length": 84 + }, + { + "text": "Ian Bell enjoys an England training session ahead of Friday's clash with Australia .", + "length": 84 + }, + { + "text": "I wouldn’t say that anything I did the other night or in Canberra was any different.", + "length": 86 + }, + { + "text": "This game is changing all the time and you have to go with that or you get left behind.", + "length": 87 + }, + { + "text": "‘I didn’t spend enough time at the crease to lose form,’ said Bell of his golden duck.", + "length": 92 + }, + { + "text": "’ Bell leaves the field after England's impressive victory over India in which he hit an unbeaten 88 .", + "length": 104 + }, + { + "text": "‘When I spoke to Eoin Morgan out here and he said he wanted me to open it was a massive confidence boost.", + "length": 107 + }, + { + "text": "Cook ran out of chances to show he was in good enough form to fulfil the brief so Bell has been able to step up.", + "length": 112 + }, + { + "text": "‘Moeen Ali has come out and had a really good go at the bowlers and I want to be similar to that,’ said Bell.", + "length": 113 + }, + { + "text": "Looking back, my most successful time in one-day cricket came in the two years I opened so I wanted to get back to that.", + "length": 120 + }, + { + "text": "It’s just we’ve given a lot of thought over the last few months as to what we want to do here and it’s about executing that now.", + "length": 134 + }, + { + "text": "Shane Watson became the latest Australian to pull out through injury and he joins Mitchell Johnson, David Warner and George Bailey on the sidelines.", + "length": 148 + }, + { + "text": "‘I still thought I would get on the trip,’ insisted Bell as he reflected on his pre-Christmas plight ahead of Friday’s Tri-series game against Australia.", + "length": 159 + }, + { + "text": "‘Opening the batting in those first 10 overs suits my game, certainly in these conditions when there’s more bounce and you can play off the back foot or hit it over the field.", + "length": 179 + }, + { + "text": "Yet the coaches still had reservations about Alex Hales' technique and wanted to continue with their original game-plan of having a ‘proper’ batsman at the top of the order to try to bat through the 50 overs.", + "length": 212 + }, + { + "text": "‘Maybe it will be in a slightly different style, a bit more orthodox, but the important thing is recognising the surface you’re on and putting your foot down if it’s flat or getting through it if it’s doing a bit.", + "length": 221 + }, + { + "text": "Now after following up his 187 in a warm-up match in Canberra with a dreamy unbeaten 88 against India he can reflect on a dramatic switch in one-day fortunes that sees him with one final chance of making a World Cup impact.", + "length": 223 + }, + { + "text": "If England are going to make an impact in the World Cup then they will have to post scores of 300 plus with at least one batsman making the big century or even double hundred that are becoming common place in one-day cricket.", + "length": 225 + }, + { + "text": "It would be a huge boost for England if they could execute those plans against Australia at the Bellerive Oval and they will never have a better chance to end their dismal run of results here as the hosts are missing key players.", + "length": 229 + }, + { + "text": "’ His elevation, which was proposed by the England coaches and then endorsed by new captain Morgan, was interpreted by many as a reflection of the ‘old-fashioned’ tactics that had held England back in the limited-overs game.", + "length": 230 + }, + { + "text": "Bell was out first ball to the pace of Mitchell Starc when Australia won in Sydney at the start of this warm-up Tri-series and knows he has to succeed against them if he is to truly establish himself among the leading openers here.", + "length": 231 + }, + { + "text": "There has been no bigger beneficiary from Alastair Cook’s demise as England one-day captain than Ian Bell who has stepped up from the Sri Lankan sidelines to become a thoroughly modern World Cup opener in a couple of easy strides.", + "length": 232 + }, + { + "text": "Bell’s participation in the biggest tournament in one-day cricket was in doubt when he was left out after two matches of England’s tour of Sri Lanka but the late switch away from Cook gave him a lifeline he has gratefully seized.", + "length": 233 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5356857776641846 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:41.881794308Z", + "first_section_created": "2025-12-23T09:36:41.883656383Z", + "last_section_published": "2025-12-23T09:36:41.883862791Z", + "all_results_received": "2025-12-23T09:36:41.948411695Z", + "output_generated": "2025-12-23T09:36:41.948636804Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 64, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:41.883656383Z", + "publish_time": "2025-12-23T09:36:41.883862791Z", + "first_worker_start": "2025-12-23T09:36:41.884415213Z", + "last_worker_end": "2025-12-23T09:36:41.947446Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:41.884546419Z", + "start_time": "2025-12-23T09:36:41.884615721Z", + "end_time": "2025-12-23T09:36:41.884696625Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:41.884647Z", + "start_time": "2025-12-23T09:36:41.884806Z", + "end_time": "2025-12-23T09:36:41.947446Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:41.88434021Z", + "start_time": "2025-12-23T09:36:41.884415213Z", + "end_time": "2025-12-23T09:36:41.884534118Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:41.884408013Z", + "start_time": "2025-12-23T09:36:41.884513917Z", + "end_time": "2025-12-23T09:36:41.884559119Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4321, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/007d6b621501a4b4f307d5125786f08cfb717a11.json b/data/output/007d6b621501a4b4f307d5125786f08cfb717a11.json new file mode 100644 index 0000000..f619455 --- /dev/null +++ b/data/output/007d6b621501a4b4f307d5125786f08cfb717a11.json @@ -0,0 +1,318 @@ +{ + "file_name": "007d6b621501a4b4f307d5125786f08cfb717a11.txt", + "total_words": 808, + "top_n_words": [ + { + "word": "the", + "count": 57 + }, + { + "word": "of", + "count": 21 + }, + { + "word": "in", + "count": 18 + }, + { + "word": "texas", + "count": 16 + }, + { + "word": "s", + "count": 15 + }, + { + "word": "that", + "count": 15 + }, + { + "word": "a", + "count": 14 + }, + { + "word": "abortion", + "count": 13 + }, + { + "word": "law", + "count": 13 + }, + { + "word": "to", + "count": 13 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "Thwarted: The U.", + "length": 16 + }, + { + "text": "Rick Perry last year.", + "length": 21 + }, + { + "text": "2 ruling by a panel of the New Orleans-based U.", + "length": 47 + }, + { + "text": "' Abbott's office said he would continue to defend the law.", + "length": 59 + }, + { + "text": "' Abortion opponents predicted they will ultimately prevail.", + "length": 60 + }, + { + "text": "That portion has already been upheld twice by the appeals court.", + "length": 64 + }, + { + "text": "The admitting privileges rule remains in effect elsewhere in Texas.", + "length": 67 + }, + { + "text": "1 - and the order from the Supreme Court means they are on hold again.", + "length": 70 + }, + { + "text": "'This is definitely a short-term loss, but not necessarily a long- term loss.", + "length": 77 + }, + { + "text": "Davis said she was 'thankful that women can continue to make their own personal decisions.", + "length": 90 + }, + { + "text": "Yeakel's ruling temporarily suspended the upgrade rules before they could go into effect Sept.", + "length": 94 + }, + { + "text": "And none was left along the Texas-Mexico border or outside any of the state's largest urban areas.", + "length": 98 + }, + { + "text": "Some other clinics had closed even earlier amid enforcement of the rule on admitting privileges at nearby hospitals.", + "length": 116 + }, + { + "text": "In an unsigned order, the justices sided with abortion rights advocates and health care providers in suspending an Oct.", + "length": 119 + }, + { + "text": "'We're relieved that the court stepped in to stop this, and we hope this dangerous law is ultimately overturned completely.", + "length": 123 + }, + { + "text": "Supreme Court on Tuesday blocked key parts of a 2013 law in Texas that had closed all but eight facilities providing abortions .", + "length": 128 + }, + { + "text": "The law's opponents note that leaves nearly a million Texas women embarking on drives longer than three hours to get an abortion.", + "length": 129 + }, + { + "text": "Hilltop Women's Reproductive Services in El Paso has been referring women who want abortions to another clinic it owns in New Mexico.", + "length": 133 + }, + { + "text": "District Judge Lee Yeakel, who had found that requiring hospital-style upgrades was less about safety than making access to abortion difficult.", + "length": 143 + }, + { + "text": "'This does not protect the health and safety of women who are undergoing abortion,' said Joe Pojman, executive director of Texas Alliance for Life.", + "length": 147 + }, + { + "text": "The office of Texas Attorney General Greg Abbott, a Republican who is the favorite in next month's governor's race, is leading the defense of the law.", + "length": 150 + }, + { + "text": "Until the nation's highest court intervened, only abortion facilities in the Houston, Austin, San Antonio and the Dallas-Fort Worth areas remained open.", + "length": 152 + }, + { + "text": "Even as it weighs the merits of the law, the appeals court had said it could be enforced - opening the door for the emergency appeal to the Supreme Court.", + "length": 154 + }, + { + "text": "5th Circuit Court of Appeals that Texas could immediately apply a rule making abortion clinics statewide spend millions of dollars on hospital-level upgrades.", + "length": 158 + }, + { + "text": "Gloria Martinez, Hilltop's administrative nurse, said she would call state officials Wednesday before deciding whether the clinic will resume performing abortions.", + "length": 163 + }, + { + "text": "Supreme Court on Tuesday blocked key parts of a 2013 law in Texas that had closed all but eight facilities providing abortions in America's second most-populous state.", + "length": 167 + }, + { + "text": "Attorneys for the state have denied that Texas women would be burdened by fewer abortion facilities, saying nearly 9 in 10 would still live within 150 miles of a provider.", + "length": 171 + }, + { + "text": "Allowing the rules on hospital-level upgrades to be enforced - including mandatory operating rooms and air filtration systems - shuttered more than a dozen clinics across Texas.", + "length": 177 + }, + { + "text": "The 5th Circuit is still considering the overall constitutionality of the sweeping measure overwhelmingly passed by the GOP-controlled Texas Legislature and signed into law by Gov.", + "length": 180 + }, + { + "text": "Democrat Wendy Davis launched her campaign for governor behind the celebrity she achieved through a nearly 13-hour filibuster last summer that temporarily blocked the law's passage.", + "length": 181 + }, + { + "text": "The court also put on hold a provision of the law only as it applies to clinics in McAllen and El Paso that requires doctors at the facilities to have admitting privileges at nearby hospitals.", + "length": 192 + }, + { + "text": "Protesters on both sides of the abortion debate appeared in Austin, Texas in July 2013 (file photo) Justices Samuel Alito, Antonin Scalia and Clarence Thomas said they would have ruled against the clinics in all respects.", + "length": 221 + }, + { + "text": "'We're seeing the terrible impact these restrictions have on thousands of Texas women who effectively no longer have access to safe and legal abortion,' said Cecile Richards, president of Planned Parenthood Federation of America.", + "length": 229 + }, + { + "text": "The court also put on hold a provision of the law only as it applies to clinics in McAllen and El Paso that requires doctors at the facilities to have admitting privileges at nearby hospitals (file photo) The fight over the Texas law is the latest over tough new abortion restrictions that have been enacted across the country.", + "length": 327 + }, + { + "text": "' The fight over the Texas law is the latest over tough new abortion restrictions that have been enacted across the country (file photo) Stopped: The justices suspended an appeals court ruling that Texas could immediately apply a rule making abortion clinics statewide spend millions of dollars on hospital-level upgrades (file photo) The 5th Circuit decision had blocked an August ruling by Austin-based U.", + "length": 407 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6485568881034851 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:42.384648789Z", + "first_section_created": "2025-12-23T09:36:42.386007744Z", + "last_section_published": "2025-12-23T09:36:42.386204152Z", + "all_results_received": "2025-12-23T09:36:42.449241795Z", + "output_generated": "2025-12-23T09:36:42.449434402Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:42.386007744Z", + "publish_time": "2025-12-23T09:36:42.386204152Z", + "first_worker_start": "2025-12-23T09:36:42.38689678Z", + "last_worker_end": "2025-12-23T09:36:42.448245Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:42.386826877Z", + "start_time": "2025-12-23T09:36:42.38689678Z", + "end_time": "2025-12-23T09:36:42.386981283Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:42.387176Z", + "start_time": "2025-12-23T09:36:42.387336Z", + "end_time": "2025-12-23T09:36:42.448245Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:42.386824377Z", + "start_time": "2025-12-23T09:36:42.38690388Z", + "end_time": "2025-12-23T09:36:42.387000284Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:42.386820777Z", + "start_time": "2025-12-23T09:36:42.38689808Z", + "end_time": "2025-12-23T09:36:42.386933682Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4876, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/007dd168bc8ad32984e2e2430e5ddc974ed0a831.json b/data/output/007dd168bc8ad32984e2e2430e5ddc974ed0a831.json new file mode 100644 index 0000000..a2e2bb3 --- /dev/null +++ b/data/output/007dd168bc8ad32984e2e2430e5ddc974ed0a831.json @@ -0,0 +1,242 @@ +{ + "file_name": "007dd168bc8ad32984e2e2430e5ddc974ed0a831.txt", + "total_words": 325, + "top_n_words": [ + { + "word": "in", + "count": 14 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "the", + "count": 13 + }, + { + "word": "to", + "count": 11 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "guinea", + "count": 6 + }, + { + "word": "have", + "count": 5 + }, + { + "word": "is", + "count": 5 + }, + { + "word": "it", + "count": 5 + }, + { + "word": "people", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Mia De Graaf .", + "length": 14 + }, + { + "text": "Mohamed Ag Ayoya, said in a statement.", + "length": 38 + }, + { + "text": "Esther Sterk, MSF tropical medicine advisor, said.", + "length": 50 + }, + { + "text": "They are now stepping up units and work force in Guinea .", + "length": 57 + }, + { + "text": "The first cases of the haemorrhagic fever were spotted late last month.", + "length": 71 + }, + { + "text": "Symptoms include internal and external bleeding, vomiting and diarrhoea.", + "length": 72 + }, + { + "text": "It is spread by personal contact and kills 25-90 per cent of those infected.", + "length": 76 + }, + { + "text": "'Specialised staff are providing care to patients showing signs of infection.", + "length": 77 + }, + { + "text": "Officials have urged people to stay calm, wash their hands and report all cases to authorities.", + "length": 95 + }, + { + "text": "At least 59 people have died in Guinea following the outbreak of deadly flesh-eating virus Ebola.", + "length": 97 + }, + { + "text": "'Isolation units are essential to prevent the spread of the disease, which is highly contagious,' Dr.", + "length": 101 + }, + { + "text": "Based on evidence from cases in Uganda (pictured), health ministers have warned people to avoid contact .", + "length": 105 + }, + { + "text": "They have also warned people against eating wild meat, and offered free health care to anybody that detects symptoms.", + "length": 117 + }, + { + "text": "With no known cure or vaccine, health ministers have warned people to stay inside while they try to stop it spreading.", + "length": 118 + }, + { + "text": "It is also flying in 33 tons of medicines and equipment and setting up isolation units in the three affected areas in the country.", + "length": 130 + }, + { + "text": "Analysts suggest this is the first instance of Ebola in Guinea, as it is more commonly found in Uganda and Democratic Republic of Congo.", + "length": 136 + }, + { + "text": "'In Guinea, a country with a weak medical infrastructure, an outbreak like this can be devastating,' the Unicef representative in Guinea, Dr.", + "length": 141 + }, + { + "text": "Ebola was first spotted in Uganda and Democratic Republic of Congo, where Medicins Sans Frontieres workers pictured are pictured at an isolation unit.", + "length": 150 + }, + { + "text": "The international medical charity Medecins Sans Frontieres announced yesterday it was reinforcing its medical and logistics teams in Guinea in response to the epidemic.", + "length": 168 + }, + { + "text": "Unicef has prepositioned supplies and stepped up communication on the ground to sensitise medical staff and local populations on how to avoid contracting the illness, Agoya added.", + "length": 179 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8659000992774963 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:42.88696875Z", + "first_section_created": "2025-12-23T09:36:42.888660018Z", + "last_section_published": "2025-12-23T09:36:42.888856526Z", + "all_results_received": "2025-12-23T09:36:42.952855207Z", + "output_generated": "2025-12-23T09:36:42.952991612Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:42.888660018Z", + "publish_time": "2025-12-23T09:36:42.888856526Z", + "first_worker_start": "2025-12-23T09:36:42.889395847Z", + "last_worker_end": "2025-12-23T09:36:42.951968Z", + "total_journey_time_ms": 63, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:42.889357846Z", + "start_time": "2025-12-23T09:36:42.889395847Z", + "end_time": "2025-12-23T09:36:42.889440549Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:42.889597Z", + "start_time": "2025-12-23T09:36:42.889723Z", + "end_time": "2025-12-23T09:36:42.951968Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 62 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:42.889410248Z", + "start_time": "2025-12-23T09:36:42.889474751Z", + "end_time": "2025-12-23T09:36:42.889524153Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:42.889358246Z", + "start_time": "2025-12-23T09:36:42.889414148Z", + "end_time": "2025-12-23T09:36:42.889431249Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 62, + "min_processing_ms": 62, + "max_processing_ms": 62, + "avg_processing_ms": 62, + "median_processing_ms": 62, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2017, + "slowest_section_id": 0, + "slowest_section_time_ms": 63 + } +} diff --git a/data/output/007ddc6d5805b3c515bc2ca8843dcff28ec4903c.json b/data/output/007ddc6d5805b3c515bc2ca8843dcff28ec4903c.json new file mode 100644 index 0000000..bfb8656 --- /dev/null +++ b/data/output/007ddc6d5805b3c515bc2ca8843dcff28ec4903c.json @@ -0,0 +1,580 @@ +{ + "file_name": "007ddc6d5805b3c515bc2ca8843dcff28ec4903c.txt", + "total_words": 1231, + "top_n_words": [ + { + "word": "the", + "count": 60 + }, + { + "word": "in", + "count": 39 + }, + { + "word": "isis", + "count": 37 + }, + { + "word": "to", + "count": 37 + }, + { + "word": "and", + "count": 36 + }, + { + "word": "of", + "count": 33 + }, + { + "word": "a", + "count": 26 + }, + { + "word": "s", + "count": 23 + }, + { + "word": "said", + "count": 23 + }, + { + "word": "kobani", + "count": 15 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "N.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "U.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "S.", + "length": 2 + }, + { + "text": "The U.", + "length": 6 + }, + { + "text": "priority.", + "length": 9 + }, + { + "text": "official told CNN.", + "length": 18 + }, + { + "text": "And hours after U.", + "length": 18 + }, + { + "text": "Defense Department.", + "length": 19 + }, + { + "text": "Central Command said.", + "length": 21 + }, + { + "text": "Central Command said.", + "length": 21 + }, + { + "text": "military official said.", + "length": 23 + }, + { + "text": "In Syria, according to U.", + "length": 25 + }, + { + "text": "Violent protests in Turkey .", + "length": 28 + }, + { + "text": "Police then cordoned it off.", + "length": 28 + }, + { + "text": "How ISIS makes its millions .", + "length": 29 + }, + { + "text": "Death toll in fight for Kobani .", + "length": 32 + }, + { + "text": "The threat posed by ISIL is real.", + "length": 33 + }, + { + "text": "\"We do not take this step lightly.", + "length": 34 + }, + { + "text": "Central Command in Iraq and Syria.", + "length": 34 + }, + { + "text": "-led coalition airstrikes overnight.", + "length": 36 + }, + { + "text": "Dutch join in, Canada to follow suit .", + "length": 38 + }, + { + "text": "Danial Qassim said most were killed in U.", + "length": 41 + }, + { + "text": "\" Near Kobani, airstrikes hit ISIS vehicles .", + "length": 45 + }, + { + "text": "special envoy for Syria, said in a statement.", + "length": 45 + }, + { + "text": "Why is ISIS so successful at luring Westerners?", + "length": 47 + }, + { + "text": "-- One south of Kobani destroyed an ISIS unit .", + "length": 47 + }, + { + "text": "-- Two southwest of Kobani damaged an ISIS tank .", + "length": 49 + }, + { + "text": "efforts, a senior administration official and a U.", + "length": 50 + }, + { + "text": "\"The international community needs to defend them.", + "length": 50 + }, + { + "text": "But they are now very close to not being able to do so.", + "length": 55 + }, + { + "text": "Belgium participated in overnight airstrikes in Iraq, U.", + "length": 56 + }, + { + "text": "\"They have been defending themselves with great courage.", + "length": 56 + }, + { + "text": "Central Command, the airstrikes against ISIS included: .", + "length": 56 + }, + { + "text": "There were another four strikes elsewhere in Syria and four in Iraq.", + "length": 68 + }, + { + "text": "If left unchecked this terrorist organization will grow and grow quickly.", + "length": 73 + }, + { + "text": "At least five Turkish police officers were among the injured, Anadolu said.", + "length": 75 + }, + { + "text": "The international community cannot sustain another city falling under ISIS.", + "length": 75 + }, + { + "text": "Mosul has also been overtaken by ISIS, which calls itself the Islamic State.", + "length": 76 + }, + { + "text": "CNN affiliate RTL Belgium said about 50 protesters stormed into the building.", + "length": 77 + }, + { + "text": "\"And we're going after those specific structures that I mentioned,\" Psaki added.", + "length": 80 + }, + { + "text": "-- One south of Kobani destroyed three ISIS armed vehicles and damaged another .", + "length": 80 + }, + { + "text": "It also detailed locations of targets and specified the costs of munitions used.", + "length": 80 + }, + { + "text": "In Belgium, meanwhile, Kurdish protesters stormed the European Parliament building.", + "length": 83 + }, + { + "text": "\" Five airstrikes targeting groups of ISIS fighters struck near Kobani overnight, U.", + "length": 84 + }, + { + "text": "\"To be absolutely clear, Canada's engagement in Iraq is not a ground combat mission.", + "length": 84 + }, + { + "text": "Some demonstrators set fire to a bus and garbage truck and smashed windows and cars.", + "length": 84 + }, + { + "text": "Tal Afar is about 70 kilometers (43 miles) west of Mosul -- Iraq's second-largest city.", + "length": 87 + }, + { + "text": "The vehicles were destroyed, and ISIS fighters may have been killed, the ministry said.", + "length": 87 + }, + { + "text": "strike force, in addition to other aircraft for surveillance, reconnaissance and refueling.", + "length": 91 + }, + { + "text": "Erdogan called for a no-fly zone, and for the arming of opposition groups in Iraq and Syria.", + "length": 92 + }, + { + "text": "-- One southeast of Kobani destroyed an ISIS armed vehicle carrying anti-aircraft artillery .", + "length": 93 + }, + { + "text": "The answer, CNN global affairs correspondent Elise Labott said, sounded like a resounding \"no.", + "length": 94 + }, + { + "text": "airstrikes targeting ISIS struck near Kobani overnight, the city's future was far from certain.", + "length": 95 + }, + { + "text": "Stopping ISIS from taking over cities, towns and other territory in Syria isn't the focus of U.", + "length": 95 + }, + { + "text": "The United States, Saudi Arabia, and the UAE all participated in the strikes, Central Command said.", + "length": 99 + }, + { + "text": "And two protesters died during demonstrations in the southeastern province of Siirt, Anadolu reported.", + "length": 102 + }, + { + "text": "Some European nations have joined the fight against ISIS, but the Kurdish protesters want tougher action.", + "length": 105 + }, + { + "text": "At a briefing, a State Department spokeswoman faced persistent questions over whether saving the city was a U.", + "length": 110 + }, + { + "text": "They are fighting with normal weapons, whereas the ISIS has got tanks and mortars,\" Staffan de Mistura, the U.", + "length": 110 + }, + { + "text": "Destroying ISIS will require ground operations, Erdogan said, according to the semi-official Anadolu news agency.", + "length": 113 + }, + { + "text": "Speaking to Syrian refugees, he said there has been \"no achievement yet,\" despite months of efforts against ISIS.", + "length": 113 + }, + { + "text": "Three people were killed and at least 36 injured in demonstrations throughout Turkey, police said, according to Anadolu.", + "length": 120 + }, + { + "text": "The group said it has documented the deaths of 219 ISIS jihadists, 163 members of the Kurdish militia, and 20 civilians.", + "length": 120 + }, + { + "text": "Outnumbered and outgunned by ISIS, local fighters trying to defend the Kurdish-dominated city have tried to flee into Turkey.", + "length": 125 + }, + { + "text": "military airstrikes against ISIS in Iraq and Syria have cost more than $62 million so far, according to data provided by the U.", + "length": 127 + }, + { + "text": "More than 400 people have been killed in the fight for Kobani since mid-September, the Syrian Observatory for Human Rights said.", + "length": 128 + }, + { + "text": "The data, apparently sent out inadvertently to the Pentagon's press contacts on Monday, listed the total number of airstrikes by U.", + "length": 131 + }, + { + "text": "There were clashes overnight in Istanbul, and a group of about 50 to 60 protesters blocked a road, CNN affiliate CNN Turk reported.", + "length": 131 + }, + { + "text": "\"Certainly no one wants to see Kobani fall, but our primary objective here is preventing (ISIS) from gaining a safe haven,\" she said.", + "length": 133 + }, + { + "text": "A northern Iraqi hospital has received the bodies of at least 29 suspected ISIS militants, the head of the Tal Afar hospital said Tuesday.", + "length": 138 + }, + { + "text": "One protester was killed in the middle of a demonstration after being hit in the head by a gas canister in the town of Varto, police said.", + "length": 138 + }, + { + "text": "\"But we would not have taken the range of military strikes we have taken, including overnight, if we did not want to support and -- and defend the area.", + "length": 152 + }, + { + "text": "People upset over what they consider Turkey's failure to respond adequately to the ISIS threat launched protests in Turkey, some of which turned violent.", + "length": 153 + }, + { + "text": "Turkish President Recep Tayyip Erdogan warned that Kobani was about to fall to ISIS as protests raged in his country over how the group should be handled.", + "length": 154 + }, + { + "text": "Airstrikes against the radical Islamist group in Kobani can be challenging because many targets are too close to the Turkish border or Kurdish forces to strike, a senior U.", + "length": 172 + }, + { + "text": "\" Were Kobani to fall, ISIS would control a complete swath of land between its self-declared capital of Raqqa, Syria, and Turkey -- a stretch of more than 100 kilometers (62 miles).", + "length": 181 + }, + { + "text": "goal, she said, is \"a deliberate, well thought-out campaign in Syria\" to disrupt ISIS command and control, destroy the group's infrastructure and attack sources of fuel and financing for ISIS.", + "length": 192 + }, + { + "text": "\"Finally, they are hitting the right places,\" one local fighter against ISIS said after the airstrikes near Kobani, which is close to the Turkish border and key to ISIS' effort to extend its terrain.", + "length": 200 + }, + { + "text": "Kurdish fighters defending the key Syrian border city of Kobani are dangerously outmatched as ISIS advances, a top United Nations official said Tuesday, calling for the international community to step in.", + "length": 204 + }, + { + "text": "Dutch forces participated for the first time in airstrikes against ISIS in Iraq as well, dropping three bombs on ISIS vehicles that were shooting at Kurdish Peshmerga forces, the Dutch Defense Ministry said in a statement.", + "length": 222 + }, + { + "text": "Elsewhere in Syria, two strikes west of al-Hasakah hit multiple ISIS buildings, one near Deir Ezzor struck an ISIS staging area and IED production facility, and one southwest of Rabiyah struck a small group of ISIS fighters.", + "length": 224 + }, + { + "text": "It includes a number of targeted measures, being taken with allies, to severely limit the ability of ISIL to engage in full scale military movements and to operate bases in the open,\" Prime Minister Stephen Harper said in a statement.", + "length": 234 + }, + { + "text": "\" \"It's obviously horrific to watch what's going on the ground, but it's important for the United States, for us to also step back and remember our strategic objective as it relates to our efforts and our engagement in Syria,\" spokeswoman Jen Psaki told reporters.", + "length": 264 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7398474216461182 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:43.389639024Z", + "first_section_created": "2025-12-23T09:36:43.390081242Z", + "last_section_published": "2025-12-23T09:36:43.390423056Z", + "all_results_received": "2025-12-23T09:36:43.469129231Z", + "output_generated": "2025-12-23T09:36:43.46936424Z", + "total_processing_time_ms": 79, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 78, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:43.390081242Z", + "publish_time": "2025-12-23T09:36:43.390297651Z", + "first_worker_start": "2025-12-23T09:36:43.39077397Z", + "last_worker_end": "2025-12-23T09:36:43.464439Z", + "total_journey_time_ms": 74, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:43.390858474Z", + "start_time": "2025-12-23T09:36:43.390940077Z", + "end_time": "2025-12-23T09:36:43.39103018Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:43.391021Z", + "start_time": "2025-12-23T09:36:43.391212Z", + "end_time": "2025-12-23T09:36:43.464439Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 73 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:43.390805471Z", + "start_time": "2025-12-23T09:36:43.390873174Z", + "end_time": "2025-12-23T09:36:43.39101988Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:43.390694767Z", + "start_time": "2025-12-23T09:36:43.39077397Z", + "end_time": "2025-12-23T09:36:43.390862974Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:43.390347653Z", + "publish_time": "2025-12-23T09:36:43.390423056Z", + "first_worker_start": "2025-12-23T09:36:43.390869374Z", + "last_worker_end": "2025-12-23T09:36:43.468232Z", + "total_journey_time_ms": 77, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:43.390918976Z", + "start_time": "2025-12-23T09:36:43.390958178Z", + "end_time": "2025-12-23T09:36:43.391000379Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:43.391177Z", + "start_time": "2025-12-23T09:36:43.391311Z", + "end_time": "2025-12-23T09:36:43.468232Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 76 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:43.390978378Z", + "start_time": "2025-12-23T09:36:43.39101548Z", + "end_time": "2025-12-23T09:36:43.391074382Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:43.390828272Z", + "start_time": "2025-12-23T09:36:43.390869374Z", + "end_time": "2025-12-23T09:36:43.390901775Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 149, + "min_processing_ms": 73, + "max_processing_ms": 76, + "avg_processing_ms": 74, + "median_processing_ms": 76, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 3715, + "slowest_section_id": 1, + "slowest_section_time_ms": 77 + } +} diff --git a/data/output/007e2bc411fbe75ab5a536ba6c1304916b59dc66.json b/data/output/007e2bc411fbe75ab5a536ba6c1304916b59dc66.json new file mode 100644 index 0000000..b776a5c --- /dev/null +++ b/data/output/007e2bc411fbe75ab5a536ba6c1304916b59dc66.json @@ -0,0 +1,322 @@ +{ + "file_name": "007e2bc411fbe75ab5a536ba6c1304916b59dc66.txt", + "total_words": 546, + "top_n_words": [ + { + "word": "the", + "count": 23 + }, + { + "word": "her", + "count": 19 + }, + { + "word": "pippa", + "count": 17 + }, + { + "word": "a", + "count": 15 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "she", + "count": 13 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "on", + "count": 10 + }, + { + "word": "s", + "count": 10 + }, + { + "word": "to", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "com.", + "length": 4 + }, + { + "text": "30am.", + "length": 5 + }, + { + "text": "30am .", + "length": 6 + }, + { + "text": "Perhaps .", + "length": 9 + }, + { + "text": "Calling Nico?", + "length": 13 + }, + { + "text": "pictured with her at Lou Lou's.", + "length": 31 + }, + { + "text": "bright pink Aztec-print trousers.", + "length": 33 + }, + { + "text": "print is graphic and eye catching.", + "length": 34 + }, + { + "text": "She wore it on a 34 inch gold chain.", + "length": 36 + }, + { + "text": "er choice of outfit proves she's a .", + "length": 36 + }, + { + "text": "Pippa couldn't be missed  in her baggy, .", + "length": 42 + }, + { + "text": "pants in their Autumn/Winter 13 collections.", + "length": 44 + }, + { + "text": "she was calling boyfriend Nico Jackson, who wasn't .", + "length": 52 + }, + { + "text": "Eye-catching: Pippa's pants are £295 from Temperley .", + "length": 54 + }, + { + "text": "Pippa was seen talking into her phone as she left the club .", + "length": 60 + }, + { + "text": "Pippa accessorised last night with a gold necklace from Kinnari.", + "length": 64 + }, + { + "text": "Good night out: Pippa Middleton beamed as she left Lou Lou's at 2.", + "length": 66 + }, + { + "text": "animatedly into on speaker so she didn't have to hold it to her ear.", + "length": 68 + }, + { + "text": "and Alexander McQueen just some of the designers including loud print .", + "length": 71 + }, + { + "text": "dedicated follower of fashion with Stella McCartney, Matthew Williamson .", + "length": 73 + }, + { + "text": "from the early 1900’s and made of soft crepe-de-chine, the ikat style .", + "length": 73 + }, + { + "text": "The designers say the trousers as are 'inspired by Ballet Russes costumes .", + "length": 75 + }, + { + "text": "On trend: Pippa wore bright pink graphic print trousers, £295 from Temperley .", + "length": 79 + }, + { + "text": "' Pippa teamed her trousers with black top, beige jacket and long gold necklace.", + "length": 80 + }, + { + "text": "Pippa's were from Temperley London, a favourite brand of her and her sister Kate.", + "length": 81 + }, + { + "text": "Pippa Middleton let her hair down as she enjoyed a night on the town with friends.", + "length": 82 + }, + { + "text": "Homeward bound: Pippa teamed her loud trousers with a beige jacket and black top .", + "length": 82 + }, + { + "text": "Letting her hair down: The Celebrate author was in high spirits as she caught a cab home .", + "length": 90 + }, + { + "text": "Partying with Pippa: The Waitrose columnist was out with friends including banker Tom Kingston .", + "length": 96 + }, + { + "text": "Worn out: Tired Pippa then rested her head on her friend's shoulder as the taxi took them home .", + "length": 96 + }, + { + "text": "She carried a clutch bag - a fashion accessory championed by her sister the Duchess of Cambridge.", + "length": 97 + }, + { + "text": "Pippa, who will become an aunty in July, was seen pulling out her mobile phone in the taxi and talking .", + "length": 104 + }, + { + "text": "Perhaps in a nod to her royal connection, the pants are called 'Sovereign Silk Trousers' and cost £295.", + "length": 104 + }, + { + "text": "So far she has advised readers on how to make sushi for dinner parties and how to have the perfect Provence picnic.", + "length": 115 + }, + { + "text": "Shoppers can design their own jewellery on the website by choosing what base, metal, size and gem stones they would like.", + "length": 121 + }, + { + "text": "' Following the publication of her book Celebrate, Pippa's latest project involves writing a column for Waitrose magazine.", + "length": 122 + }, + { + "text": "Iconic handbag designer Lulu Guinness said she was planning to make clutch bags smaller next season because of the 'Kate effect.", + "length": 128 + }, + { + "text": "Fortunately for Pippa, she had another handsome male to lean on, resting her head on banker Tom Kingston, 34, as they left in a taxi at at 2.", + "length": 142 + }, + { + "text": "The 29-year-old visited Lou Lou's in London's Mayfair, and appeared to be in high spirits despite the notable absence of her boyfriend Nico Jackson, 35.", + "length": 152 + }, + { + "text": "The private members' club is a favourite place to party for Pippa who smiled and giggled as she left the club last night to take a taxi home along with her companions.", + "length": 167 + }, + { + "text": "Pippa choose a weave egg medium basket pendant in 14ct gold vermeil with white topaz rough gemstones inside the basket and blue sapphires scattered on the outside, pictured right.", + "length": 179 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.41106775403022766 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:43.891381262Z", + "first_section_created": "2025-12-23T09:36:43.893463046Z", + "last_section_published": "2025-12-23T09:36:43.893629452Z", + "all_results_received": "2025-12-23T09:36:43.961172977Z", + "output_generated": "2025-12-23T09:36:43.961375585Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 67, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:43.893463046Z", + "publish_time": "2025-12-23T09:36:43.893629452Z", + "first_worker_start": "2025-12-23T09:36:43.894222176Z", + "last_worker_end": "2025-12-23T09:36:43.958823Z", + "total_journey_time_ms": 65, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:43.894187375Z", + "start_time": "2025-12-23T09:36:43.894255278Z", + "end_time": "2025-12-23T09:36:43.894341081Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:43.894487Z", + "start_time": "2025-12-23T09:36:43.894618Z", + "end_time": "2025-12-23T09:36:43.958823Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 64 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:43.894233877Z", + "start_time": "2025-12-23T09:36:43.894296479Z", + "end_time": "2025-12-23T09:36:43.894399683Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:43.894157074Z", + "start_time": "2025-12-23T09:36:43.894222176Z", + "end_time": "2025-12-23T09:36:43.894252578Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 64, + "min_processing_ms": 64, + "max_processing_ms": 64, + "avg_processing_ms": 64, + "median_processing_ms": 64, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3101, + "slowest_section_id": 0, + "slowest_section_time_ms": 65 + } +} diff --git a/data/output/007e3268f5e8f4c92cce2855e00e0ff93292cebe.json b/data/output/007e3268f5e8f4c92cce2855e00e0ff93292cebe.json new file mode 100644 index 0000000..678ea4d --- /dev/null +++ b/data/output/007e3268f5e8f4c92cce2855e00e0ff93292cebe.json @@ -0,0 +1,230 @@ +{ + "file_name": "007e3268f5e8f4c92cce2855e00e0ff93292cebe.txt", + "total_words": 348, + "top_n_words": [ + { + "word": "the", + "count": 17 + }, + { + "word": "in", + "count": 12 + }, + { + "word": "he", + "count": 11 + }, + { + "word": "brandis", + "count": 9 + }, + { + "word": "was", + "count": 8 + }, + { + "word": "his", + "count": 7 + }, + { + "word": "senator", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "as", + "count": 6 + }, + { + "word": "hicks", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "What do you have to say?", + "length": 24 + }, + { + "text": "'He's run away,' he said.", + "length": 25 + }, + { + "text": "'It's too late – he's gone.", + "length": 30 + }, + { + "text": "' As it turned out, Senator Brandis had nothing to say.", + "length": 56 + }, + { + "text": "'All the children in detention will be released by the early months of next year.", + "length": 81 + }, + { + "text": "But just as he was finishing his address Hicks shouted: 'Hey, my name is David Hicks!", + "length": 85 + }, + { + "text": "Senator Brandis was the keynote speaker at a Human Rights Awards function in Sydney's CBD.", + "length": 90 + }, + { + "text": "Former Guantanamo Bay detainee David Hicks heckled Senator Brandis at the end of his speech .", + "length": 93 + }, + { + "text": "I was tortured for five-and-a-half years in Guantanamo Bay in the full knowledge of your party!", + "length": 95 + }, + { + "text": "Mr Hicks branded Senator Brandis a 'coward' for walking off stage and not answering his question .", + "length": 98 + }, + { + "text": "Previously in his speech he said that all asylum seeker children would be released from Christmas Island by early 2015.", + "length": 119 + }, + { + "text": "' The incident was a blot on the night for Senator Brandis who had earlier made some important announcements in his address.", + "length": 124 + }, + { + "text": "But as he walked off the stage at the Museum of Contemporary Arts, Mr Hicks told reporters he was a 'coward' for not answering his question.", + "length": 140 + }, + { + "text": "Federal Attorney-General George Brandis endured an uncomfortable night at Human Rights Awards function in Sydney's CBD when he was heckled .", + "length": 141 + }, + { + "text": "' Earlier in his speech Senator Brandis made it clear that all asylum seeker children would be released from Christmas Island by early 2015 .", + "length": 141 + }, + { + "text": "Senator Brandis also confirmed that the government would ask the Human Rights Commission to conduct a major inquiry into employment discrimination against older Australians and people with disabilities.", + "length": 202 + }, + { + "text": "Federal Attorney-General George Brandis did not get the the reaction he was hoping for at an awards ceremony in Sydney on Wednesday evening when he was heckled by former Guantanamo Bay detainee David Hicks.", + "length": 206 + }, + { + "text": "'Between now and Christmas, in other words, in the next two to three weeks, all of the children in detention on Christmas Island will be released from Christmas Island and will be returned to the community as soon as possible,' he said as the packed crowd applauded.", + "length": 266 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5744643211364746 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:44.39439645Z", + "first_section_created": "2025-12-23T09:36:44.394746865Z", + "last_section_published": "2025-12-23T09:36:44.394936172Z", + "all_results_received": "2025-12-23T09:36:44.463941756Z", + "output_generated": "2025-12-23T09:36:44.46406496Z", + "total_processing_time_ms": 69, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 69, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:44.394746865Z", + "publish_time": "2025-12-23T09:36:44.394936172Z", + "first_worker_start": "2025-12-23T09:36:44.39537389Z", + "last_worker_end": "2025-12-23T09:36:44.461347Z", + "total_journey_time_ms": 66, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:44.395461193Z", + "start_time": "2025-12-23T09:36:44.395524196Z", + "end_time": "2025-12-23T09:36:44.395564998Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:44.395702Z", + "start_time": "2025-12-23T09:36:44.39584Z", + "end_time": "2025-12-23T09:36:44.461347Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 65 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:44.395322888Z", + "start_time": "2025-12-23T09:36:44.39537389Z", + "end_time": "2025-12-23T09:36:44.395440393Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:44.39536779Z", + "start_time": "2025-12-23T09:36:44.395437992Z", + "end_time": "2025-12-23T09:36:44.395457593Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 65, + "min_processing_ms": 65, + "max_processing_ms": 65, + "avg_processing_ms": 65, + "median_processing_ms": 65, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2030, + "slowest_section_id": 0, + "slowest_section_time_ms": 66 + } +} diff --git a/data/output/007e5c3851aaf9e71496092514b92b1b9ab6ba2a.json b/data/output/007e5c3851aaf9e71496092514b92b1b9ab6ba2a.json new file mode 100644 index 0000000..4535bc6 --- /dev/null +++ b/data/output/007e5c3851aaf9e71496092514b92b1b9ab6ba2a.json @@ -0,0 +1,298 @@ +{ + "file_name": "007e5c3851aaf9e71496092514b92b1b9ab6ba2a.txt", + "total_words": 557, + "top_n_words": [ + { + "word": "the", + "count": 38 + }, + { + "word": "to", + "count": 21 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "clinton", + "count": 11 + }, + { + "word": "was", + "count": 11 + }, + { + "word": "family", + "count": 8 + }, + { + "word": "for", + "count": 8 + }, + { + "word": "have", + "count": 8 + }, + { + "word": "said", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "Why us?", + "length": 7 + }, + { + "text": "30am Wednesday .", + "length": 16 + }, + { + "text": "'We have no enemies.", + "length": 20 + }, + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "5 million Manhattan Beach Home .", + "length": 32 + }, + { + "text": "30am - other than the fact they're black.", + "length": 41 + }, + { + "text": "He said they had initially blamed the community.", + "length": 48 + }, + { + "text": "His wife, a corporate lawyer, was away on business.", + "length": 51 + }, + { + "text": "' Manhattan Beach's population is 84 percent white.", + "length": 51 + }, + { + "text": "Fire: A firebomb went off out front of the home about 2.", + "length": 56 + }, + { + "text": "He said he woke to the sound of at least two explosions.", + "length": 56 + }, + { + "text": "Mr Clinton, a pharmacist, was sleep when the attack occurred.", + "length": 61 + }, + { + "text": "'And I tell you- my gut tells me this was racially motivated.", + "length": 61 + }, + { + "text": "Vigil: Members of the community brought candles to the vigil .", + "length": 62 + }, + { + "text": "Upset: Mr Clinton said his 'gut' tells him they were targeted .", + "length": 63 + }, + { + "text": "' Damage: The firebomb caused $200,000 worth of damage to the $3.", + "length": 65 + }, + { + "text": "'And it was somebody that had the intent to harm, injure or even kill us.", + "length": 73 + }, + { + "text": "Thankful: Mrs Clinton, a lawyer, thanked the community for their support .", + "length": 74 + }, + { + "text": "He quickly got his three children up, grabbed the family dog and ran outside.", + "length": 77 + }, + { + "text": "However their attitude changed when they saw the overwhelming amount of support.", + "length": 80 + }, + { + "text": "Smashed: The family said they heard two explosions before realizing the house was on fire .", + "length": 91 + }, + { + "text": "Support: Hundreds turned out to show support for the Clinton family at a vigil on Friday night .", + "length": 96 + }, + { + "text": "Targeted: Ronald and Malissia Clinton believe their family was targeted as part of a hate crime .", + "length": 98 + }, + { + "text": "Victim Ronald Clinton (left), pictured with his family, said he was amazed at the turnout for the service .", + "length": 107 + }, + { + "text": "Members of the all came together for a vigil on Friday night as Ronald insisted the attack was racially-motivated.", + "length": 114 + }, + { + "text": "'When it happens to one of us, it happens to us all,' Clinton said at the vigil -causing the crowd to erupt in cheers.", + "length": 118 + }, + { + "text": "He then used a garden hose to douse the flames at the front of the house until firefighters arrived to finish the job.", + "length": 118 + }, + { + "text": "' After the fire, Mr Clinton told My Fox LA: 'I don't have proof, I don't have any type of motive, but I do have a gut,'.", + "length": 123 + }, + { + "text": "Young and old: Families were among those who held up candles to show their support for the family who have decide to stay in the community .", + "length": 140 + }, + { + "text": "Ronald and Malissia Clinton have chosen to stay in their Manhattan Beach, California, home after being inundated with support from the local community.", + "length": 151 + }, + { + "text": "The couple believe there is no other reason for the random act  - where a gasoline-filled tire was set alight, blowing out the front of their house at 2.", + "length": 154 + }, + { + "text": "Hundreds of people from a mostly white neighborhood have gathered to support an African-American's family whose home was firebombed in a suspected hate crime.", + "length": 158 + }, + { + "text": "Mr Clinton added: 'I have to admit, initially, we considered it, but you know, this community is just too amazing for us to let one individual force us to leave.", + "length": 161 + }, + { + "text": "Solidarity: Hundreds of people from the mostly white Manhattan Beach, California, neighborhood gathered to support an African-American's family whose home was firebombed in a suspected hate crime .", + "length": 197 + }, + { + "text": "Justice: After the vigil in the city Manhattan Beach Mayor Wayne Powell said: 'This is not what our community is all about and we're going to get the person who did this' Adding to The Daily Breeze, Mr Clinton said: 'I just know there is no other reason to target us.", + "length": 268 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7938249707221985 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:44.895726371Z", + "first_section_created": "2025-12-23T09:36:44.897169729Z", + "last_section_published": "2025-12-23T09:36:44.897366237Z", + "all_results_received": "2025-12-23T09:36:44.956002002Z", + "output_generated": "2025-12-23T09:36:44.95618191Z", + "total_processing_time_ms": 60, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 58, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:44.897169729Z", + "publish_time": "2025-12-23T09:36:44.897366237Z", + "first_worker_start": "2025-12-23T09:36:44.898061565Z", + "last_worker_end": "2025-12-23T09:36:44.955174Z", + "total_journey_time_ms": 58, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:44.898031264Z", + "start_time": "2025-12-23T09:36:44.898095767Z", + "end_time": "2025-12-23T09:36:44.898158069Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:44.898273Z", + "start_time": "2025-12-23T09:36:44.898395Z", + "end_time": "2025-12-23T09:36:44.955174Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 56 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:44.898084166Z", + "start_time": "2025-12-23T09:36:44.898144969Z", + "end_time": "2025-12-23T09:36:44.898216772Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:44.897999563Z", + "start_time": "2025-12-23T09:36:44.898061565Z", + "end_time": "2025-12-23T09:36:44.898090367Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 56, + "min_processing_ms": 56, + "max_processing_ms": 56, + "avg_processing_ms": 56, + "median_processing_ms": 56, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3139, + "slowest_section_id": 0, + "slowest_section_time_ms": 58 + } +} diff --git a/data/output/007e80b8bde5a78657a3a030602938462b58021c.json b/data/output/007e80b8bde5a78657a3a030602938462b58021c.json new file mode 100644 index 0000000..3884d1c --- /dev/null +++ b/data/output/007e80b8bde5a78657a3a030602938462b58021c.json @@ -0,0 +1,298 @@ +{ + "file_name": "007e80b8bde5a78657a3a030602938462b58021c.txt", + "total_words": 570, + "top_n_words": [ + { + "word": "the", + "count": 31 + }, + { + "word": "of", + "count": 16 + }, + { + "word": "a", + "count": 12 + }, + { + "word": "on", + "count": 11 + }, + { + "word": "at", + "count": 10 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "to", + "count": 9 + }, + { + "word": "in", + "count": 8 + }, + { + "word": "news", + "count": 8 + }, + { + "word": "for", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Hey kids.", + "length": 9 + }, + { + "text": "£Leveson.", + "length": 10 + }, + { + "text": "£mediateesside.", + "length": 16 + }, + { + "text": "Daily Mail Reporter .", + "length": 21 + }, + { + "text": "They the rules, stick to them.", + "length": 30 + }, + { + "text": "A Scotland Yard spokesman said: ‘At 6.", + "length": 40 + }, + { + "text": "’ Detectives investigating phone hacking .", + "length": 44 + }, + { + "text": "Last updated at 6:38 PM on 30th November 2011 .", + "length": 47 + }, + { + "text": "The woman is now in custody at a police station in Northumbria.", + "length": 63 + }, + { + "text": "International chief executive Rebekah Brooks (left) and ex-Downing .", + "length": 68 + }, + { + "text": "have arrested a series of high-profile figures, including former News .", + "length": 71 + }, + { + "text": "Bethany Usher worked for News Of The World and rival paper The People .", + "length": 71 + }, + { + "text": "‘It would be inappropriate to discuss any further details at this time.", + "length": 73 + }, + { + "text": "One tweet yesterday read: ‘For god sake Paul McMullen, shut your sickening trap.", + "length": 82 + }, + { + "text": "A university lecturer was arrested today over phone-hacking at the News of the World.", + "length": 85 + }, + { + "text": "35am officers arrested the woman on suspicion of conspiracy to intercept voicemail messages...", + "length": 94 + }, + { + "text": "’ Ms Usher has referred several times to the Leveson Inquiry on her Twitter page, @bethanyusher.", + "length": 98 + }, + { + "text": "’ Another, posted on Monday, said: ‘Am I the only former tabloid reporter who followed the £PCC?", + "length": 101 + }, + { + "text": "Scotland Yard’s phone-hacking squad is working its way through 300 million emails from News International.", + "length": 108 + }, + { + "text": "Arrested: Former News of the World journailst Bethany Usher is under police custody over hacking allegations .", + "length": 110 + }, + { + "text": "Ms Usher studied at Leeds University before getting her first job as a trainee reporter on the Sunderland Echo.", + "length": 111 + }, + { + "text": "’ Regarding her arrest, a spokesman for the university said: ‘We cannot comment on an ongoing investigation.", + "length": 112 + }, + { + "text": "’ Ms Usher, who worked in the newspaper industry for seven years, is currently a senior journalism lecturer at Teesside University.", + "length": 133 + }, + { + "text": "The arrest came as Labour MP Peter Hain met detectives over claims that his computer was hacked while he was Northern Ireland Secretary.", + "length": 136 + }, + { + "text": "According to her biography on the university’s website: ‘Bethany has won four awards and was named young journalist of the year in 2003.", + "length": 140 + }, + { + "text": "Street communications chief Andy Coulson (right) She is the first Weeting arrest since sports journalist Raoul Simons, 35, was held in September.", + "length": 145 + }, + { + "text": "New Commissioner Bernard Hogan-Howe says police have already spent up to £3million on salaries, with officers speaking to 1,800 of 6,000 potential victims.", + "length": 156 + }, + { + "text": "Bethany Usher, who worked at the axed Sunday tabloid and its former rival The People, is being questioned in custody at a police station in Northumbria, sources said.", + "length": 166 + }, + { + "text": "A total of 120 officers and staff are now working on the entire investigation after 1,800 people came forward to express fears that they may have been victims of hacking.", + "length": 170 + }, + { + "text": "Detectives have arrested a series of high-profile figures, including former News International chief executive Rebekah Brooks and ex-Downing Street communications chief Andy Coulson.", + "length": 182 + }, + { + "text": "Media lawyer Charlotte Harris, of Mishcon de Reya, will act on Mr Hain’s behalf for the case, which comes under the force’s Operation Tuleta, running alongside the Weeting phone-hacking probe.", + "length": 196 + }, + { + "text": "Police are examining evidence suggesting that his files, as well as those of senior civil servants and intelligence agents, were targeted by private detectives who may have been working for News International.", + "length": 209 + }, + { + "text": "The 31-year-old - who was questioned by officers in 2006 on suspicion of providing false information for a job at Buckingham Palace but not charged - becomes the 17th arrest under Scotland Yard’s Operation Weeting.", + "length": 216 + }, + { + "text": "The scandal has already led to the closure of the News of the World after 168 years, prompted a major public inquiry and forced the resignation of Metropolitan Police Commissioner Sir Paul Stephenson and Assistant Commissioner John Yates.", + "length": 238 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.7530368566513062 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:45.398166137Z", + "first_section_created": "2025-12-23T09:36:45.399524492Z", + "last_section_published": "2025-12-23T09:36:45.399712399Z", + "all_results_received": "2025-12-23T09:36:45.46494113Z", + "output_generated": "2025-12-23T09:36:45.465126438Z", + "total_processing_time_ms": 66, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:45.399524492Z", + "publish_time": "2025-12-23T09:36:45.399712399Z", + "first_worker_start": "2025-12-23T09:36:45.400169018Z", + "last_worker_end": "2025-12-23T09:36:45.463931Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:45.400124516Z", + "start_time": "2025-12-23T09:36:45.400182518Z", + "end_time": "2025-12-23T09:36:45.400283122Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:45.400376Z", + "start_time": "2025-12-23T09:36:45.400517Z", + "end_time": "2025-12-23T09:36:45.463931Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:45.400110515Z", + "start_time": "2025-12-23T09:36:45.400174818Z", + "end_time": "2025-12-23T09:36:45.400248721Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:45.400107715Z", + "start_time": "2025-12-23T09:36:45.400169018Z", + "end_time": "2025-12-23T09:36:45.400206719Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3583, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/007ebc8b8bc2f38156250cf0a7cca4f549159720.json b/data/output/007ebc8b8bc2f38156250cf0a7cca4f549159720.json new file mode 100644 index 0000000..aca03d5 --- /dev/null +++ b/data/output/007ebc8b8bc2f38156250cf0a7cca4f549159720.json @@ -0,0 +1,388 @@ +{ + "file_name": "007ebc8b8bc2f38156250cf0a7cca4f549159720.txt", + "total_words": 814, + "top_n_words": [ + { + "word": "the", + "count": 76 + }, + { + "word": "of", + "count": 35 + }, + { + "word": "to", + "count": 30 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "press", + "count": 14 + }, + { + "word": "by", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "that", + "count": 11 + }, + { + "word": "charter", + "count": 10 + } + ], + "sorted_sentences": [ + { + "text": "| .", + "length": 3 + }, + { + "text": "By .", + "length": 4 + }, + { + "text": "The .", + "length": 5 + }, + { + "text": "However, .", + "length": 10 + }, + { + "text": "UPDATED: .", + "length": 10 + }, + { + "text": "PUBLISHED: .", + "length": 12 + }, + { + "text": "Press freedom.", + "length": 14 + }, + { + "text": "James Chapman .", + "length": 15 + }, + { + "text": "18:24 EST, 24 October 2013 .", + "length": 28 + }, + { + "text": "18:24 EST, 24 October 2013 .", + "length": 28 + }, + { + "text": "protects freedom of expression.", + "length": 31 + }, + { + "text": "The Spectator magazine has already .", + "length": 36 + }, + { + "text": "punitive costs in libel and privacy cases.", + "length": 42 + }, + { + "text": "politicians’ charter and take its chances in the courts.", + "length": 58 + }, + { + "text": "senior figures including Lord Lester, an eminent QC who is the .", + "length": 64 + }, + { + "text": "In America they wouldn’t dream of doing it, so why do it here?", + "length": 64 + }, + { + "text": "architect of reforms to the libel laws, have suggested that would .", + "length": 67 + }, + { + "text": "should have applied the most rigorous standards of consultation and .", + "length": 69 + }, + { + "text": "announced it will refuse to take part in regulation overseen by the .", + "length": 69 + }, + { + "text": "violate article 10 of the European Convention on Human Rights, which .", + "length": 70 + }, + { + "text": "to join a new regulator will be hit with ‘exemplary’ damages and .", + "length": 70 + }, + { + "text": "Government has passed legislation meaning that newspapers that refuse .", + "length": 71 + }, + { + "text": "examination of the Royal Charter proposed by the industry, which would .", + "length": 72 + }, + { + "text": "’ The Mayor of London Boris Johnson backed the move by the newspapers.", + "length": 72 + }, + { + "text": "have enshrined tough regulatory standards at the same time as protecting .", + "length": 74 + }, + { + "text": "He told the Mail: ‘The state has no business trying to regulate the content of newspapers.", + "length": 92 + }, + { + "text": "Industry bodies, through the Press Standards Board of Finance, will apply to the High Court for a judicial review .", + "length": 115 + }, + { + "text": "Mirror plc, said after nine months of work and consultation a ‘tough, independent and effective regulator’ was in sight.", + "length": 124 + }, + { + "text": "Politicians agreed the detail of their own charter designed to oversee newspaper regulation in the wake of the Leveson Inquiry .", + "length": 128 + }, + { + "text": "They are to argue that the Government’s handling of a rival Royal Charter proposed by the newspaper and magazine industry was unlawful.", + "length": 137 + }, + { + "text": "In a separate announcement, industry representatives announced a final set of plans to set up a new Independent Press Standards Organisation.", + "length": 141 + }, + { + "text": "The warning was signed by the World Press Freedom Committee, the International Press Institute and the Commonwealth Press Union among others.", + "length": 141 + }, + { + "text": "It is thought to be the first time the Queen has been asked to sign a Royal Charter imposing a system of regulation on an unwilling industry.", + "length": 141 + }, + { + "text": "‘They singularly failed to do so, and that is why – as the issues at stake are so extraordinarily high – we are having to take this course of action.", + "length": 155 + }, + { + "text": "Newspaper bosses last night announced plans to mount an extraordinary legal challenge to plans to impose on the Press a Royal Charter written by politicians.", + "length": 157 + }, + { + "text": "Planning minister Nick Boles suggested newspapers should refuse to cooperate and mount a legal challenge against the way the Government has handled the issue .", + "length": 159 + }, + { + "text": "Industry bodies – through the Press Standards Board of Finance, which funds the existing regulatory system – are to apply to the High Court for judicial review.", + "length": 164 + }, + { + "text": "Earlier this month, politicians agreed the detail of their own charter designed to oversee newspaper regulation in the wake of the Leveson Inquiry into media standards.", + "length": 168 + }, + { + "text": "‘A free press is the bedrock of democracy and any Royal Charter needs to be fully and properly consulted on – anything less risks  outcomes that threaten those freedoms.", + "length": 174 + }, + { + "text": "’ The industry will argue that despite repeated requests, the Privy Council refused to discuss what criteria they used to judge its charter until after it had decided to reject it.", + "length": 182 + }, + { + "text": "Lord Black of Brentwood, chairman of the Press Standards Board of Finance, said the Government's decision has 'enormous ramifications for free speech' ‘The Government and the Privy Council .", + "length": 192 + }, + { + "text": "One close ally of the Prime Minister, planning minister Nick Boles, suggested newspapers should refuse to cooperate and mount a legal challenge against the way the Government has handled the issue.", + "length": 197 + }, + { + "text": "It claims that the application was not dealt with fairly, that the press had a right to be consulted which the Government and the Privy Council failed to do, and that the procedures deployed were ‘irrational’.", + "length": 213 + }, + { + "text": "This week, an international coalition of free speech groups warned that the ‘repressive’ plans to regulate Britain’s 300-year-old free press will undermine the country’s international standing and the reputation of the Queen.", + "length": 233 + }, + { + "text": "They will argue a decision by the Privy Council, a secretive body of ministers which advises the Queen, to reject an industry-backed charter in favour of one agreed behind closed doors by the three main political parties, should be quashed.", + "length": 240 + }, + { + "text": "Lord Black of Brentwood, chairman of the Press Standards Board of Finance, said: ‘The decision by the Government and the Privy Council on this matter has enormous ramifications for free speech both here in the UK, and – because of our leadership role in the Commonwealth and developing world – across the globe.", + "length": 317 + }, + { + "text": "An alternative proposal put forward by the Press, which would have meant a new independent regulator having strong investigative powers and the right to impose fines of up to £1million for wrongdoing, up-front corrections, with inaccuracies corrected fully and prominently, and independence from the industry and politicians, was rejected.", + "length": 340 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5575922131538391 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:45.900005978Z", + "first_section_created": "2025-12-23T09:36:45.900398594Z", + "last_section_published": "2025-12-23T09:36:45.900836612Z", + "all_results_received": "2025-12-23T09:36:45.974802395Z", + "output_generated": "2025-12-23T09:36:45.974979502Z", + "total_processing_time_ms": 74, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 73, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:45.900398594Z", + "publish_time": "2025-12-23T09:36:45.900710306Z", + "first_worker_start": "2025-12-23T09:36:45.901225727Z", + "last_worker_end": "2025-12-23T09:36:45.973885Z", + "total_journey_time_ms": 73, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:45.901412135Z", + "start_time": "2025-12-23T09:36:45.901471437Z", + "end_time": "2025-12-23T09:36:45.90155204Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:45.90161Z", + "start_time": "2025-12-23T09:36:45.901774Z", + "end_time": "2025-12-23T09:36:45.973885Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 72 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:45.901219127Z", + "start_time": "2025-12-23T09:36:45.90129523Z", + "end_time": "2025-12-23T09:36:45.901383834Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:45.901151124Z", + "start_time": "2025-12-23T09:36:45.901225727Z", + "end_time": "2025-12-23T09:36:45.901264729Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:45.900751708Z", + "publish_time": "2025-12-23T09:36:45.900836612Z", + "first_worker_start": "2025-12-23T09:36:45.901470437Z", + "last_worker_end": "2025-12-23T09:36:45.944852Z", + "total_journey_time_ms": 44, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:45.901435836Z", + "start_time": "2025-12-23T09:36:45.901470437Z", + "end_time": "2025-12-23T09:36:45.901477337Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:45.901775Z", + "start_time": "2025-12-23T09:36:45.901909Z", + "end_time": "2025-12-23T09:36:45.944852Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 42 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:45.901517839Z", + "start_time": "2025-12-23T09:36:45.90154494Z", + "end_time": "2025-12-23T09:36:45.90155114Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:45.901568841Z", + "start_time": "2025-12-23T09:36:45.901605443Z", + "end_time": "2025-12-23T09:36:45.901608043Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 2, + "total_processing_ms": 114, + "min_processing_ms": 42, + "max_processing_ms": 72, + "avg_processing_ms": 57, + "median_processing_ms": 72, + "total_queue_wait_ms": 2, + "avg_queue_wait_ms": 1 + }, + "topn": { + "worker_type": "topn", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 2, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 2, + "average_section_size": 2560, + "slowest_section_id": 0, + "slowest_section_time_ms": 73 + } +} diff --git a/data/output/007efcae4647051818f8aea0a4580af34cdc6a70.json b/data/output/007efcae4647051818f8aea0a4580af34cdc6a70.json new file mode 100644 index 0000000..22e05e0 --- /dev/null +++ b/data/output/007efcae4647051818f8aea0a4580af34cdc6a70.json @@ -0,0 +1,626 @@ +{ + "file_name": "007efcae4647051818f8aea0a4580af34cdc6a70.txt", + "total_words": 1800, + "top_n_words": [ + { + "word": "the", + "count": 107 + }, + { + "word": "to", + "count": 53 + }, + { + "word": "of", + "count": 48 + }, + { + "word": "in", + "count": 45 + }, + { + "word": "a", + "count": 39 + }, + { + "word": "party", + "count": 28 + }, + { + "word": "that", + "count": 26 + }, + { + "word": "and", + "count": 25 + }, + { + "word": "he", + "count": 21 + }, + { + "word": "for", + "count": 19 + } + ], + "sorted_sentences": [ + { + "text": "The .", + "length": 5 + }, + { + "text": "'All .", + "length": 6 + }, + { + "text": "'Time .", + "length": 7 + }, + { + "text": "people.", + "length": 7 + }, + { + "text": "policy.", + "length": 7 + }, + { + "text": "'Unlike .", + "length": 9 + }, + { + "text": "4 billion.", + "length": 10 + }, + { + "text": "'Pensioners, .", + "length": 14 + }, + { + "text": "in their own interest.", + "length": 22 + }, + { + "text": "compliance department.", + "length": 22 + }, + { + "text": "from wealthy individuals.", + "length": 25 + }, + { + "text": "'Nothing has so far happened.", + "length": 29 + }, + { + "text": "requirements of electoral law.", + "length": 30 + }, + { + "text": "The following year, after a £1.", + "length": 32 + }, + { + "text": "'David Cameron should come clean.", + "length": 33 + }, + { + "text": "These are strictly enforced by our .", + "length": 36 + }, + { + "text": "The journalists secured the two-hour .", + "length": 38 + }, + { + "text": "Party staffer now working as a lobbyist.", + "length": 40 + }, + { + "text": "'What happened is completely unacceptable.", + "length": 42 + }, + { + "text": "Now it appears obvious why,' said Labour MP .", + "length": 45 + }, + { + "text": "'It’s quite right that Peter Cruddas has resigned.", + "length": 52 + }, + { + "text": "'The promise also featured in the Coalition agreement.", + "length": 54 + }, + { + "text": "Michael Dugher who was speaking before Mr Cruddas quit.", + "length": 55 + }, + { + "text": "They should not be allowed to duck the issue any longer.", + "length": 56 + }, + { + "text": "donations to the Conservative Party have to comply with the .", + "length": 61 + }, + { + "text": "donation' if they wanted access to senior government figures.", + "length": 61 + }, + { + "text": "Sunday Times said she told the reporters they should make a 'huge .", + "length": 67 + }, + { + "text": "donations to the Conservative Party do not buy party or government .", + "length": 68 + }, + { + "text": "and again the Tory party has been the obstacle to capping donations .", + "length": 69 + }, + { + "text": "government is out of touch with the overwhelming majority of British .", + "length": 70 + }, + { + "text": "meeting with Mr Cruddas through Sarah Southern, a former Conservative .", + "length": 71 + }, + { + "text": "the young unemployed and squeezed middle families cannot afford to buy .", + "length": 72 + }, + { + "text": "this sort of access or influence which is just another reason why this .", + "length": 72 + }, + { + "text": "the Labour Party, where union donations are traded for party policies, .", + "length": 72 + }, + { + "text": "Attempts at a cross-party consensus for reform have repeatedly foundered.", + "length": 73 + }, + { + "text": "He said there was no point in 'scratching around' with donations of £10,000.", + "length": 77 + }, + { + "text": "Similarly, I have never knowingly even met anyone from the Number 10 policy unit.", + "length": 81 + }, + { + "text": "' Labour challenged the Prime Minister to 'come clean' about what he knew and when.", + "length": 83 + }, + { + "text": "Friends describe Mr Cruddas as a classic ‘Cockney geezer’ who enjoys his wealth.", + "length": 84 + }, + { + "text": "I will make sure there is a proper party inquiry to make sure this can’t happen again.", + "length": 88 + }, + { + "text": "Peter Cruddas, Britain’s 15th richest man, has enjoyed a spectacular rags-to-riches story.", + "length": 92 + }, + { + "text": "Under fire: Labour challenged the Prime Minister to 'come clean' about what he knew and when .", + "length": 94 + }, + { + "text": "He showed off a £200,000 platinum watch in one recent interview and boasted that he had 15 more.", + "length": 97 + }, + { + "text": "In a meeting secretly recorded by The Sunday Times, he said: 'It will be awesome for your business.", + "length": 99 + }, + { + "text": "Sleaze watchdog Sir Christopher Kelly warned today that the parties could 'duck the issue no longer'.", + "length": 101 + }, + { + "text": "He said he had not consulted any politicians or senior party officials before the recorded conversation.", + "length": 104 + }, + { + "text": "'But in order to make that clear beyond doubt, I have regrettably decided to resign with immediate effect.", + "length": 106 + }, + { + "text": "'Clearly there is no question of donors being able to influence policy or gain undue access to politicians.", + "length": 107 + }, + { + "text": "2007 that membership of the House of Lords should be decided by an independent body in a fully transparent process.", + "length": 115 + }, + { + "text": "'All three of the major parties promised to end the ‘big donor culture’ in their manifestos for the last election.", + "length": 118 + }, + { + "text": "'Will the PM say exactly what he knew and when about an apparent effort to sell access and influence in Downing Street?", + "length": 119 + }, + { + "text": "In 2007 the investment bank Goldman Sachs bought a 10 per cent stake in CMC, which valued Mr Cruddas’s business at £1.", + "length": 121 + }, + { + "text": "This is not the way that we raise money in the Conservative Party, it shouldn’t have happened,' Mr Cameron told BBC News.", + "length": 123 + }, + { + "text": "' A Conservative Party spokesman said: 'No donation was ever accepted or even formally considered by the Conservative Party.", + "length": 124 + }, + { + "text": "The son of a meat porter in London’s East End, he left school at 15 and went on to become one of the richest men in the City.", + "length": 127 + }, + { + "text": "' The 'cash-for-access' row involving Conservative Party co-treasurer Peter Cruddas evokes memories of political scandals past.", + "length": 127 + }, + { + "text": "'The parties collectively need urgently to address the damage this does to confidence in the integrity of the political process.", + "length": 128 + }, + { + "text": "It had been reported that millionaires who gave large donations to the Labour and Conservative parties were to be given peerages.", + "length": 129 + }, + { + "text": "He is also known to have been a member of the prestigious Monte Carlo Golf Club, which plays host to some of Europe's top players.", + "length": 130 + }, + { + "text": "’ In one shopping spree he bought 17 pairs of shoes at the same store so he could have identical pairs in each of his five homes.", + "length": 131 + }, + { + "text": "I imagine this is presumably because it requires all the parties to face up to some difficult issues which they would rather avoid.", + "length": 131 + }, + { + "text": "During the inquiry, Mr Blair became the first prime minister to be interviewed by police as part of a political corruption inquiry.", + "length": 131 + }, + { + "text": "They are said to have discussed the creation of a British subsidiary and the possibility of using UK employees to make the donation.", + "length": 132 + }, + { + "text": "In the middle of the decade was the 'cash-for-honours' affair, which raised questions about the activities of the two biggest parties.", + "length": 134 + }, + { + "text": "' Mr Cameron today responded to the damaging revelations by branding Mr Cruddas's 'cash for access' claims as 'completely unacceptable'.", + "length": 136 + }, + { + "text": "He has given about £350,000 to the Conservative Party in the last two years and also helped bankroll the recent ‘No to AV’ campaign.", + "length": 137 + }, + { + "text": "' Mr Cruddas said he only took up the post at the beginning of the month and was 'keen to meet anyone potentially interested in donating'.", + "length": 138 + }, + { + "text": "' The claims are particularly damaging, however, given Mr Cameron's strong stance on 'secret corporate lobbying' which he declared in 2010.", + "length": 139 + }, + { + "text": "'Specifically, it was categorically not the case that I could offer, or that David Cameron would consider, any access as a result of a donation.", + "length": 144 + }, + { + "text": "The Prime Minister insisted that was 'not the way' the Conservative Party raised money and promised an inquiry to ensure it would not happen again.", + "length": 147 + }, + { + "text": "' Sir Christopher, chairman of the independent Committee on Standards in Public Life, said: 'It would be wrong to regard this as an isolated event.", + "length": 147 + }, + { + "text": "' The Conservative Party said Cruddas would be replaced in his post by Stanley Fink, a member of the House of Lords who had previously held the role.", + "length": 149 + }, + { + "text": "Announcing his resignation, Mr Cruddas said in a statement: 'I deeply regret any impression of impropriety arising from my bluster in that conversation.", + "length": 152 + }, + { + "text": "' Quit: Tory co-treasurer Peter Cruddas has resigned after being filmed apparently offering access to the PM in return for donations of £250,000 a year .", + "length": 154 + }, + { + "text": "4 million, 19-month investigation, the Crown Prosecution Service announced that nobody would be charged, as there was no realistic prospect of a conviction.", + "length": 156 + }, + { + "text": "Alleged links between the granting of honours and financial support for parties dogged the last two years of Labour Prime Minister Tony Blair's time at Number 10.", + "length": 162 + }, + { + "text": "After starting out as an office junior he became a City trader before setting up the spread betting firm CMC Markets, which focuses on bets on the financial markets.", + "length": 165 + }, + { + "text": "He told one interviewer: ‘I’ve got a £10 million apartment in Monaco, a £5 million house in England, another fantastic house in Antibes, a yacht and a private jet.", + "length": 169 + }, + { + "text": "The then chairman of the Public Administration Committee, Dr Tony Wright, said that though no charges were brought over the scandal, trust in public life had been damaged.", + "length": 171 + }, + { + "text": "He said it was right that treasurer Peter Cruddas had resigned overnight in the wake of the revelations about his comments to undercover reporters posing as potential donors.", + "length": 174 + }, + { + "text": "Conservative Party co-treasurer Peter Cruddas has resigned after being filmed apparently offering access to Prime Minister David Cameron in return for donations of £250,000 a year.", + "length": 181 + }, + { + "text": "The senior Tory fundraiser told undercover reporters pretending to be business representatives that 'things will open up for you' if they donated that amount of money to the Tories.", + "length": 181 + }, + { + "text": "Geoff Hoon, Stephen Byers and Patricia Hewitt were filmed by Channel 4's Dispatches programme discussing the possibility of working for what they thought was an American lobby firm.", + "length": 181 + }, + { + "text": "Mr Cruddas told the undercover reporters that 'premier league' donors - those giving £250,000 a year - could lobby Mr Cameron directly and their views were 'fed in' to Downing Street.", + "length": 184 + }, + { + "text": "Access: Mr Cruddas said financiers who gave 'premier league' donations could get access to Mr Cameron, possibly at venues such as the Prime Minister's Buckinghamshire retreat, Chequers .", + "length": 186 + }, + { + "text": "In 2010, Stephen Byers, Patricia Hewitt and Geoff Hoon were suspended from the Parliamentary Labour Party over allegations they were prepared to take cash to influence government policy .", + "length": 187 + }, + { + "text": "In February 2010, while leader of the opposition, he said: 'I believe that secret corporate lobbying, like the expenses scandal, goes to the heart of why people are so fed up with politics.", + "length": 189 + }, + { + "text": "A police inquiry had followed a complaint by Scottish National Party MP Angus MacNeil in 2006 that financial support was being rewarded with honours in contravention of a 1925 anti-corruption law.", + "length": 196 + }, + { + "text": "' In November last year the topic of party funding became national news when a key committee warned that large financial contributors were being given 'preferential access to political decision makers'.", + "length": 202 + }, + { + "text": "According to The Sunday Times, he believed that any prospective donations from the reporters - pretending to be wealth fund executives - would come from Liechtenstein and would be ineligible under election law.", + "length": 210 + }, + { + "text": "Events like it are inevitable as long as the main political parties are dependent for their existence on large donations from rich individuals or, in the case of the Labour Party, a small number of trade unions.", + "length": 211 + }, + { + "text": "Mr Cruddas also told the Sunday Times that Mr Cameron hosted major donors at receptions at his official Downing Street residence and at Highclere Castle, the location used in British television drama 'Downton Abbey.", + "length": 215 + }, + { + "text": "My Committee last autumn provided a blueprint for doing so which we believed to be both reasonably fair and sustainable and which, because we are an independent committee, was not distorted by thoughts of party political advantage.", + "length": 231 + }, + { + "text": "Two years ago, with Labour still in power, there was a row over so-called 'cash-for-influence' when three former Cabinet ministers were suspended from the Parliamentary Labour Party over allegations that they were prepared to take cash to influence government policy.", + "length": 267 + }, + { + "text": "The committee on standards in public life recommended that party donations be capped at a maximum if £10,000 each, while Conservative co-chairman Andrew Feldman told the committee: 'There is no question of individuals either influencing policy or gaining an unfair advantage by virtue of their financial contributions to the party.", + "length": 332 + }, + { + "text": "Liberal Democrat Chief Secretary to the Treasury Danny Alexander said Mr Cruddas’s comments were 'utterly disgraceful' and made the case again for reform of party funding - an issue that came under scrutiny when Tony Blair’s government was embroiled in allegations that honours were awarded in return for cash for the Labour Party.", + "length": 335 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5783746341864268 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:46.401539007Z", + "first_section_created": "2025-12-23T09:36:46.401965624Z", + "last_section_published": "2025-12-23T09:36:46.402414742Z", + "all_results_received": "2025-12-23T09:36:46.54987929Z", + "output_generated": "2025-12-23T09:36:46.550182102Z", + "total_processing_time_ms": 148, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 147, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:46.401965624Z", + "publish_time": "2025-12-23T09:36:46.402154832Z", + "first_worker_start": "2025-12-23T09:36:46.402720755Z", + "last_worker_end": "2025-12-23T09:36:46.489769Z", + "total_journey_time_ms": 87, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:46.402872961Z", + "start_time": "2025-12-23T09:36:46.402939063Z", + "end_time": "2025-12-23T09:36:46.403045268Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:46.402936Z", + "start_time": "2025-12-23T09:36:46.403079Z", + "end_time": "2025-12-23T09:36:46.489769Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 86 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:46.403071369Z", + "start_time": "2025-12-23T09:36:46.403140472Z", + "end_time": "2025-12-23T09:36:46.403263677Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:46.402624151Z", + "start_time": "2025-12-23T09:36:46.402720755Z", + "end_time": "2025-12-23T09:36:46.402798158Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 1, + "creation_time": "2025-12-23T09:36:46.402198234Z", + "publish_time": "2025-12-23T09:36:46.402324339Z", + "first_worker_start": "2025-12-23T09:36:46.40286376Z", + "last_worker_end": "2025-12-23T09:36:46.501173Z", + "total_journey_time_ms": 98, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:46.403010766Z", + "start_time": "2025-12-23T09:36:46.403065569Z", + "end_time": "2025-12-23T09:36:46.403154972Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:46.403149Z", + "start_time": "2025-12-23T09:36:46.403314Z", + "end_time": "2025-12-23T09:36:46.501173Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 97 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:46.402969165Z", + "start_time": "2025-12-23T09:36:46.403031467Z", + "end_time": "2025-12-23T09:36:46.403123571Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:46.402808858Z", + "start_time": "2025-12-23T09:36:46.40286376Z", + "end_time": "2025-12-23T09:36:46.402919463Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + }, + { + "section_id": 2, + "creation_time": "2025-12-23T09:36:46.402371141Z", + "publish_time": "2025-12-23T09:36:46.402414742Z", + "first_worker_start": "2025-12-23T09:36:46.403191474Z", + "last_worker_end": "2025-12-23T09:36:46.54902Z", + "total_journey_time_ms": 146, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:46.403174773Z", + "start_time": "2025-12-23T09:36:46.403191474Z", + "end_time": "2025-12-23T09:36:46.403212575Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:46.491039Z", + "start_time": "2025-12-23T09:36:46.491102Z", + "end_time": "2025-12-23T09:36:46.54902Z", + "queue_wait_time_ms": 88, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:46.403564389Z", + "start_time": "2025-12-23T09:36:46.40358829Z", + "end_time": "2025-12-23T09:36:46.403618291Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:46.403169273Z", + "start_time": "2025-12-23T09:36:46.403207974Z", + "end_time": "2025-12-23T09:36:46.403218075Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 3, + "total_processing_ms": 240, + "min_processing_ms": 57, + "max_processing_ms": 97, + "avg_processing_ms": 80, + "median_processing_ms": 86, + "total_queue_wait_ms": 88, + "avg_queue_wait_ms": 29 + }, + "topn": { + "worker_type": "topn", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 3, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 3, + "average_section_size": 3643, + "slowest_section_id": 2, + "slowest_section_time_ms": 146 + } +} diff --git a/data/output/007f4a3310324f2c3e7a201ae313733f2f1234ce.json b/data/output/007f4a3310324f2c3e7a201ae313733f2f1234ce.json new file mode 100644 index 0000000..fadb610 --- /dev/null +++ b/data/output/007f4a3310324f2c3e7a201ae313733f2f1234ce.json @@ -0,0 +1,262 @@ +{ + "file_name": "007f4a3310324f2c3e7a201ae313733f2f1234ce.txt", + "total_words": 356, + "top_n_words": [ + { + "word": "the", + "count": 15 + }, + { + "word": "of", + "count": 13 + }, + { + "word": "and", + "count": 8 + }, + { + "word": "charges", + "count": 8 + }, + { + "word": "teacher", + "count": 8 + }, + { + "word": "to", + "count": 8 + }, + { + "word": "jensen", + "count": 7 + }, + { + "word": "a", + "count": 6 + }, + { + "word": "sexual", + "count": 6 + }, + { + "word": "she", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "L.", + "length": 2 + }, + { + "text": "L.", + "length": 2 + }, + { + "text": "L.", + "length": 2 + }, + { + "text": "Lyles said J.", + "length": 13 + }, + { + "text": "Both charges are felonies.", + "length": 26 + }, + { + "text": "Greenville County Schools spokesman Oby .", + "length": 41 + }, + { + "text": "Mann Principal Charles Mayfield was informed on Friday, .", + "length": 57 + }, + { + "text": "Mann High School overheard students talking about the teacher .", + "length": 63 + }, + { + "text": "Jensen's misdemeanors were discovered after another teacher at J.", + "length": 65 + }, + { + "text": "teacher reported an alleged conversation overheard between students.", + "length": 68 + }, + { + "text": "November 9 of allegations of inappropriate texts to students after a .", + "length": 70 + }, + { + "text": "The 'sexual explicit' material was discovered after another teacher at J.", + "length": 73 + }, + { + "text": "Video: New charges for former JL Mann Teacher, charged with sexual battery .", + "length": 76 + }, + { + "text": "' Jensen is being held at the Greenville County Detention Center on $13,000 bond.", + "length": 81 + }, + { + "text": "'The principal met with the teacher that day and she resigned while the investigation continued.", + "length": 96 + }, + { + "text": "Jensen, 40, resigned immediately after she was confronted with the evidence by school chiefs a week ago.", + "length": 104 + }, + { + "text": "Mann High School in Greenville County, South Carolina, overheard other students talking about the teacher.", + "length": 106 + }, + { + "text": "She also faces one count of sexual battery and could be jailed for up to 15 years if convicted of all charges.", + "length": 110 + }, + { + "text": "She faced obscenity charges but within 24 hours police said she facing further more serious charges of sexual battery.", + "length": 118 + }, + { + "text": "Michelle Stabach Jensen was initially arrested on obscenity charges for sending the photos and texts to one of her students.", + "length": 124 + }, + { + "text": "The charges will be communicated to the State Department of Education, which determines revocation of teaching certificates.", + "length": 124 + }, + { + "text": "According to an arrest warrant a 17-year-old student said Jensen touched his genitals while she allowed him to fondler her breasts.", + "length": 131 + }, + { + "text": "Jensen was charged with two counts each of unlawful dissemination of obscene materials to persons under 18 years of age, and delinquency of a minor.", + "length": 148 + }, + { + "text": "A high school science teacher is facing multiple sex charges after being accused of fondling a student and sending another sexually explicit texts and photos.", + "length": 158 + }, + { + "text": "'The administration immediately began its investigation and contacted the school’s Resource Office with the Greenville City Police,' Lyles said via email Thursday.", + "length": 165 + }, + { + "text": "Michelle Stabach Jensen, 40, faces up to 15 years in jail for 'sexual battery' When police seized the 17-year-old boy's cell phone they found lewd text messages and photographs that were sexual in nature.", + "length": 204 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.5863316655158997 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:46.903366448Z", + "first_section_created": "2025-12-23T09:36:46.903765864Z", + "last_section_published": "2025-12-23T09:36:46.903986573Z", + "all_results_received": "2025-12-23T09:36:46.961470291Z", + "output_generated": "2025-12-23T09:36:46.961622698Z", + "total_processing_time_ms": 58, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 57, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:46.903765864Z", + "publish_time": "2025-12-23T09:36:46.903986573Z", + "first_worker_start": "2025-12-23T09:36:46.904559896Z", + "last_worker_end": "2025-12-23T09:36:46.960585Z", + "total_journey_time_ms": 56, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:46.904619698Z", + "start_time": "2025-12-23T09:36:46.904690201Z", + "end_time": "2025-12-23T09:36:46.904742903Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:46.904733Z", + "start_time": "2025-12-23T09:36:46.904882Z", + "end_time": "2025-12-23T09:36:46.960585Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 55 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:46.904518094Z", + "start_time": "2025-12-23T09:36:46.904572596Z", + "end_time": "2025-12-23T09:36:46.9046559Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:46.904520194Z", + "start_time": "2025-12-23T09:36:46.904559896Z", + "end_time": "2025-12-23T09:36:46.904589097Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 55, + "min_processing_ms": 55, + "max_processing_ms": 55, + "avg_processing_ms": 55, + "median_processing_ms": 55, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2248, + "slowest_section_id": 0, + "slowest_section_time_ms": 56 + } +} diff --git a/data/output/007f50f955e378309e0d16b8d4d9035f83c480da.json b/data/output/007f50f955e378309e0d16b8d4d9035f83c480da.json new file mode 100644 index 0000000..fefd5a4 --- /dev/null +++ b/data/output/007f50f955e378309e0d16b8d4d9035f83c480da.json @@ -0,0 +1,242 @@ +{ + "file_name": "007f50f955e378309e0d16b8d4d9035f83c480da.txt", + "total_words": 599, + "top_n_words": [ + { + "word": "the", + "count": 27 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "and", + "count": 13 + }, + { + "word": "on", + "count": 13 + }, + { + "word": "s", + "count": 12 + }, + { + "word": "we", + "count": 12 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "of", + "count": 10 + }, + { + "word": "her", + "count": 9 + }, + { + "word": "been", + "count": 8 + } + ], + "sorted_sentences": [ + { + "text": "By .", + "length": 4 + }, + { + "text": "Lucy Crossley .", + "length": 15 + }, + { + "text": "'We will see what we do about it next year.", + "length": 43 + }, + { + "text": "Tuesday night's final was watched by more than nine million viewers.", + "length": 68 + }, + { + "text": "' A BBC spokesman said the broadcaster would not be commenting on the claims.", + "length": 77 + }, + { + "text": "Her rival Kimberley Wilson, 30, had been favourite to be named champion, having been initially given odds of 11-8.", + "length": 114 + }, + { + "text": "However, the odds for the 31-year-old quickly rose to 11-10, making her favourite, after punters rushed to back her.", + "length": 116 + }, + { + "text": "Rising odds: Frances was initially the 5-2 outsider, but her odds quicky rose to 11-10 making her the firm favourite .", + "length": 118 + }, + { + "text": "He added: 'We have lost a few thousand but we take that on the chin, it's been good fun and we have maybe learned our lesson.", + "length": 125 + }, + { + "text": "Fearing insiders had been betting on the pre-recorded BBC Two show, Coral were forced to suspend the book after 90 per cent of wagers favoured Miss Quinn.", + "length": 154 + }, + { + "text": "Bookies Coral were left badly burned by the bets on eventual winner Miss Quinn, who had been backed at just 5-2 when betting opened after last week's semi final.", + "length": 161 + }, + { + "text": "Final three: Frances, pictured second from right, with her fellow Bake Off finalists and judges, from left, Ruby Tandoh, Mary Berry, Kimberley Wilson and Paul Hollywood .", + "length": 170 + }, + { + "text": "'However we have no complaints as there was always a risk attached to betting on a non-live show, and for those who backed Frances, her victory really was the icing on the cake.", + "length": 177 + }, + { + "text": "'Kimberley Wilson, the original 11-8 favourite, and Paul Hollywood’s favourite, Ruby Tandoh, 6-4 second favourite, hardly attracted a bet between them, and their odds drifted to 2-1 and 3-1 respectively.", + "length": 205 + }, + { + "text": "'Most of the bets were placed online, many from new customers, and we face a four figure payout now Frances has been crowned champion, so it’s fair to say we have had our fingers burned by this Bake Off.", + "length": 205 + }, + { + "text": "A bookmaker's have accused television insiders and friends in the know of betting on the outcome of the eagerly anticipated Great British Bake Off after a flurry of bets were placed on 'outsider' Frances Quinn.", + "length": 210 + }, + { + "text": "The children's clothing designer had been seen as an outsider to take the top prize, as judges Mary Berry and Paul Hollywood regularly criticised her spectacular creations as a victory for 'style over substance'.", + "length": 212 + }, + { + "text": "Pay out: Bookmaker's coral were forced to close its Great British Bake Off book after a flurry of bets were placed on 'outsider' Frances Quinn, prompting fears of television insiders placing wagers on the outcome .", + "length": 214 + }, + { + "text": "' Mr Stevens said that Coral had not contacted police about the insider fears, but said that the bookmaker was unsure if bets would be taken on next year's show - which is due to move to BBC One due to its popularity.", + "length": 217 + }, + { + "text": "However, Miss Quinn saw off Ms Wilson and 21-year-old Ruby Tandoh, who had been backed at 6-4, thanks to her rainbow-style savoury picnic pie and show stopper three-tier wedding cake, inspired by the Shakespearean play A Midsummer Night's Dream.", + "length": 245 + }, + { + "text": "'We were the only bookmaker to actually take bets on Great British Bake Off after last week’s semi-final, and we did so knowing that as the show was pre-recorded someone somewhere would know the outcome, so we always kept stakes quite low, but that didn’t stop plenty of people getting their bets on before we took the decision to suspend betting because more than 90 per cent of bets taken were for Frances,' said Coral’s David Stevens.", + "length": 443 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.4881036579608917 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:47.404776272Z", + "first_section_created": "2025-12-23T09:36:47.405168388Z", + "last_section_published": "2025-12-23T09:36:47.405346595Z", + "all_results_received": "2025-12-23T09:36:47.470397919Z", + "output_generated": "2025-12-23T09:36:47.470559625Z", + "total_processing_time_ms": 65, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 65, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:47.405168388Z", + "publish_time": "2025-12-23T09:36:47.405346595Z", + "first_worker_start": "2025-12-23T09:36:47.405826714Z", + "last_worker_end": "2025-12-23T09:36:47.469479Z", + "total_journey_time_ms": 64, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:47.405842215Z", + "start_time": "2025-12-23T09:36:47.405895617Z", + "end_time": "2025-12-23T09:36:47.40597662Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:47.40598Z", + "start_time": "2025-12-23T09:36:47.406122Z", + "end_time": "2025-12-23T09:36:47.469479Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 63 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:47.405773412Z", + "start_time": "2025-12-23T09:36:47.405849115Z", + "end_time": "2025-12-23T09:36:47.405931218Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:47.405763712Z", + "start_time": "2025-12-23T09:36:47.405826714Z", + "end_time": "2025-12-23T09:36:47.405868816Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 63, + "min_processing_ms": 63, + "max_processing_ms": 63, + "avg_processing_ms": 63, + "median_processing_ms": 63, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3311, + "slowest_section_id": 0, + "slowest_section_time_ms": 64 + } +} diff --git a/data/output/007fa7f25c695f864d73882f069cd2845511d72e.json b/data/output/007fa7f25c695f864d73882f069cd2845511d72e.json new file mode 100644 index 0000000..b7009fb --- /dev/null +++ b/data/output/007fa7f25c695f864d73882f069cd2845511d72e.json @@ -0,0 +1,218 @@ +{ + "file_name": "007fa7f25c695f864d73882f069cd2845511d72e.txt", + "total_words": 300, + "top_n_words": [ + { + "word": "the", + "count": 24 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "a", + "count": 10 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "on", + "count": 6 + }, + { + "word": "s", + "count": 6 + }, + { + "word": "it", + "count": 5 + }, + { + "word": "mayer", + "count": 5 + }, + { + "word": "oscar", + "count": 5 + } + ], + "sorted_sentences": [ + { + "text": "Scroll down for video .", + "length": 23 + }, + { + "text": "However, there were no injuries.", + "length": 32 + }, + { + "text": "An iconic Oscar Mayer Wienermobile has crashed into a pole in central Pennsylvania.", + "length": 83 + }, + { + "text": "Oscar Mayer is a food company and division of Kraft Foods, based in Madison, Wisconsin.", + "length": 87 + }, + { + "text": "It's much loved 1973 commercial featured a cute kid fishing and singing it's theme song.", + "length": 88 + }, + { + "text": "Crash: The iconic Oscar Mayer Wienermobile was smashed up and damaged in an accident in Harrisburg .", + "length": 100 + }, + { + "text": "They measure 27 feet in length and first hit the road in 1936 after Oscar Mayer's nephew Carl created them.", + "length": 107 + }, + { + "text": "But it's now thought that the start of that song can be changed to 'My bologna has a new first name and it's C-R-A-S-H.", + "length": 120 + }, + { + "text": "Police said the crash occurred in the 500 block of South Enola Road in Enola, when the vehicle skidded and slammed into a pole.", + "length": 127 + }, + { + "text": "Damage: Part of the wiener mobile hangs off on an icy street in the 500 block of South Enola Road in Enola, where it came to a halt .", + "length": 133 + }, + { + "text": "The giant hot dog on top of the vehicle was smashed up and damaged in the accident in Harrisburg, on Sunday, according to CBS Pittsburgh.", + "length": 139 + }, + { + "text": "The vehicle reportedly slipped off the roadway near the intersection of State Road and Fairview Avenue, smashing the windshield ABC 27 reported.", + "length": 145 + }, + { + "text": "But drivers who passed by the accident, stopped to take pictures of the damaged wienermobile on their cell phones and have shared the images on social media.", + "length": 157 + }, + { + "text": "' The company has several 'Wienermobiles,' all shaped like a hot dog on a bun, which are used to promote and advertise Oscar Mayer products in the United States.", + "length": 161 + }, + { + "text": "Windscreen: The vehicle reportedly slipped off the roadway near the intersection of State Road and Fairview Avenue, slamming into a pole and smashing the windshield .", + "length": 166 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.74298495054245 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:47.906109893Z", + "first_section_created": "2025-12-23T09:36:47.906475707Z", + "last_section_published": "2025-12-23T09:36:47.906663715Z", + "all_results_received": "2025-12-23T09:36:47.969169736Z", + "output_generated": "2025-12-23T09:36:47.969295641Z", + "total_processing_time_ms": 63, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 62, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:47.906475707Z", + "publish_time": "2025-12-23T09:36:47.906663715Z", + "first_worker_start": "2025-12-23T09:36:47.907189336Z", + "last_worker_end": "2025-12-23T09:36:47.968255Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:47.907207237Z", + "start_time": "2025-12-23T09:36:47.90727434Z", + "end_time": "2025-12-23T09:36:47.907314641Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:47.907391Z", + "start_time": "2025-12-23T09:36:47.907522Z", + "end_time": "2025-12-23T09:36:47.968255Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:47.907162735Z", + "start_time": "2025-12-23T09:36:47.907210137Z", + "end_time": "2025-12-23T09:36:47.907253139Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:47.907127634Z", + "start_time": "2025-12-23T09:36:47.907189336Z", + "end_time": "2025-12-23T09:36:47.907210937Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1681, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/00801ed3093a0fbe8db364ad395167c6255d8c01.json b/data/output/00801ed3093a0fbe8db364ad395167c6255d8c01.json new file mode 100644 index 0000000..ed2c83b --- /dev/null +++ b/data/output/00801ed3093a0fbe8db364ad395167c6255d8c01.json @@ -0,0 +1,250 @@ +{ + "file_name": "00801ed3093a0fbe8db364ad395167c6255d8c01.txt", + "total_words": 445, + "top_n_words": [ + { + "word": "the", + "count": 28 + }, + { + "word": "and", + "count": 15 + }, + { + "word": "to", + "count": 15 + }, + { + "word": "a", + "count": 11 + }, + { + "word": "in", + "count": 11 + }, + { + "word": "of", + "count": 8 + }, + { + "word": "said", + "count": 8 + }, + { + "word": "was", + "count": 7 + }, + { + "word": "at", + "count": 6 + }, + { + "word": "it", + "count": 6 + } + ], + "sorted_sentences": [ + { + "text": "One said: 'What's the problem?", + "length": 30 + }, + { + "text": "A true dominatrix would have demanded it.", + "length": 41 + }, + { + "text": "' Another said: 'I see she asked for permission.", + "length": 48 + }, + { + "text": "It's all legit and she pays her taxes just like anyone else.", + "length": 60 + }, + { + "text": "' While many of the online responses have been positive or tongue in cheek.", + "length": 75 + }, + { + "text": "Nearby resident Jason Morris, 44, said: 'I personally don't care as long as everyone is consenting.", + "length": 99 + }, + { + "text": "Stockport Council said said the plans would improve economic, social and environmental conditions .", + "length": 100 + }, + { + "text": "She was using the converted warehouse at the Vauxhall Industrial Estate in Reddish without permission .", + "length": 103 + }, + { + "text": "White has previously been fined for breaching health and safety laws after firefighters were unable to get in .", + "length": 111 + }, + { + "text": "At the time she said: 'It involved a lot of humiliation, doing domestic work and dressing up in women’s clothes.", + "length": 114 + }, + { + "text": "Firemen finally gained access and found a trove of handcuffs, chains and other restraining devices in the basement.", + "length": 115 + }, + { + "text": "A council spokesman said: 'The proposal would improve the economic, social and environmental conditions of the area.", + "length": 116 + }, + { + "text": "Planning officers have now approved the application, which asked for opening hours of 11am to 7pm, Tuesday to Saturday.", + "length": 119 + }, + { + "text": "' Planning officers have now approved the application, which asked for opening hours of 11am to 7pm, Tuesday to Saturday .", + "length": 122 + }, + { + "text": "They also discovered several canisters of nitrous oxide and laughing gas - also know as 'hippie crack' - which White’s clients used.", + "length": 134 + }, + { + "text": "' Despite that the reaction to the club has been largely positive and the council received five responses to its consultation - all in favour.", + "length": 142 + }, + { + "text": "' The fetish club was discovered in 2012 when firemen were unable to get into the building after it caught fire because of all the locked doors.", + "length": 144 + }, + { + "text": "At the hearing at Stockport Magistrates Court Miss White said men were generally restrained or gagged in the dungeon, but nothing was too severe.", + "length": 145 + }, + { + "text": "Miss White, who refers to herself as a 'Manchester mistress' and 'sensual sadist, was fined £8,000 for breaching health and safety laws at the time.", + "length": 150 + }, + { + "text": "'It therefore comprises sustainable development and the local planning authority [Stockport Council] worked proactively and positively to issue the decision without delay.", + "length": 171 + }, + { + "text": "She was already using the premises without planning permission but was forced to submit a retrospective application in September after a complaint was made to the local authority.", + "length": 179 + }, + { + "text": "Stockport Council has given Lorraine White the go-ahead to run her 'photography and mild fetish play' business in a converted warehouse at the Vauxhall Industrial Estate in Reddish.", + "length": 181 + }, + { + "text": "Lorraine White has been given permission to run her 'photography and mild fetish play business' A dominatrix who calls herself Princess Lucina has won planning permission to turn a warehouse into a sex fetish dungeon - because it'll help the local economy.", + "length": 256 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4367789924144745 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:48.407449114Z", + "first_section_created": "2025-12-23T09:36:48.409159383Z", + "last_section_published": "2025-12-23T09:36:48.40933959Z", + "all_results_received": "2025-12-23T09:36:48.469194204Z", + "output_generated": "2025-12-23T09:36:48.469352711Z", + "total_processing_time_ms": 61, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 59, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:48.409159383Z", + "publish_time": "2025-12-23T09:36:48.40933959Z", + "first_worker_start": "2025-12-23T09:36:48.40982391Z", + "last_worker_end": "2025-12-23T09:36:48.468216Z", + "total_journey_time_ms": 59, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:48.409883812Z", + "start_time": "2025-12-23T09:36:48.409946515Z", + "end_time": "2025-12-23T09:36:48.410004417Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:48.410117Z", + "start_time": "2025-12-23T09:36:48.410261Z", + "end_time": "2025-12-23T09:36:48.468216Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 57 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:48.409800709Z", + "start_time": "2025-12-23T09:36:48.409852911Z", + "end_time": "2025-12-23T09:36:48.409919313Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:48.409760307Z", + "start_time": "2025-12-23T09:36:48.40982391Z", + "end_time": "2025-12-23T09:36:48.409854111Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 57, + "min_processing_ms": 57, + "max_processing_ms": 57, + "avg_processing_ms": 57, + "median_processing_ms": 57, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2772, + "slowest_section_id": 0, + "slowest_section_time_ms": 59 + } +} diff --git a/data/output/00803afa5f3db5412257ec1b619e92435141804f.json b/data/output/00803afa5f3db5412257ec1b619e92435141804f.json new file mode 100644 index 0000000..902fb22 --- /dev/null +++ b/data/output/00803afa5f3db5412257ec1b619e92435141804f.json @@ -0,0 +1,310 @@ +{ + "file_name": "00803afa5f3db5412257ec1b619e92435141804f.txt", + "total_words": 690, + "top_n_words": [ + { + "word": "the", + "count": 50 + }, + { + "word": "to", + "count": 23 + }, + { + "word": "putin", + "count": 18 + }, + { + "word": "a", + "count": 17 + }, + { + "word": "and", + "count": 12 + }, + { + "word": "pope", + "count": 12 + }, + { + "word": "in", + "count": 10 + }, + { + "word": "berlusconi", + "count": 9 + }, + { + "word": "has", + "count": 9 + }, + { + "word": "is", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "S.", + "length": 2 + }, + { + "text": "In .", + "length": 4 + }, + { + "text": "5million bribe to a senator.", + "length": 28 + }, + { + "text": "push for military intervention.", + "length": 31 + }, + { + "text": "Putin is on a two-day visit in Italy.", + "length": 37 + }, + { + "text": "Russian president Vladimir Putin met the .", + "length": 42 + }, + { + "text": "Berlusconi as Russia's ambassador to the Vatican.", + "length": 49 + }, + { + "text": "Since then Berlusconi has been given several jail terms .", + "length": 57 + }, + { + "text": "Friends: The pair during a mid-air meeting in April 2004.", + "length": 57 + }, + { + "text": "with other international leaders to help the people of Syria.", + "length": 61 + }, + { + "text": "But the pair differ publicly in several areas including on gay rights.", + "length": 70 + }, + { + "text": "Pope today - amid claims he is planning to name scandal-mired Silvio .", + "length": 70 + }, + { + "text": "September the Pope wrote directly to Putin imploring him to co-operate .", + "length": 72 + }, + { + "text": "He will also meet with Italian President Giorgio Napolitano and Premier Enrico Letta .", + "length": 86 + }, + { + "text": "Pope Francis and Putin did agree on one thing, however - they were both against the British and U.", + "length": 98 + }, + { + "text": "Putin kisses the gift he presented to Pope Francis during the private audience at the Vatican City .", + "length": 100 + }, + { + "text": "So far the reports that Putin, left, could make Berlusconi, right, a Russian ambassador are unconfirmed .", + "length": 105 + }, + { + "text": "Cosy: Berlusconi and Putin are firm friends, despite both being controversial figures on the world stage .", + "length": 106 + }, + { + "text": "Russian church figures have accused the Vatican of trying to convert their worshippers to make them Catholic.", + "length": 109 + }, + { + "text": "Although the two have clashed over Syria, they agree that outside military intervention will escalate the civil war .", + "length": 117 + }, + { + "text": "Putin refused to take sides in the conflict, while the Pope said it was important that the violence did not escalate.", + "length": 117 + }, + { + "text": "The pair have defended each other through thick and thin, so the move is hardly surprising for the ever-controversial pair.", + "length": 123 + }, + { + "text": "While the Pope has reportedly taken a softer line, Putin has cracked down on gay rights and criminalised protests in favour.", + "length": 124 + }, + { + "text": "Pope Francis shakes hand with Russian President Vladimir Putin as they exchange gifts during the private audience at the Vatican City .", + "length": 135 + }, + { + "text": "In September the Pope wrote directly to Putin imploring him to co-operate with other international leaders to help the people of Syria .", + "length": 136 + }, + { + "text": "They have attended extreme fighting events together and Mr Putin even has a bed named after him at the 77-year-old billionaire’s mansion.", + "length": 139 + }, + { + "text": "Such a move would confer diplomatic immunity on Berlusconi – and an escape from his latest legal woes courtesy of his old pal in the Kremlin.", + "length": 143 + }, + { + "text": "Former KGB agent Mr Putin was last night arriving in Rome to meet Pope Francis and Italian ministers but he is also scheduled to meet Berlusconi.", + "length": 145 + }, + { + "text": "He is also appealing against a conviction  for sex with underage prostitute Ruby ‘the heart-stealer’, and faces a new trial for an alleged £2.", + "length": 148 + }, + { + "text": "The three-time premier had hoped for a last minute pardon but Italian President Giorgio Napolitano has made it clear that this is not on his agenda.", + "length": 148 + }, + { + "text": "The Russian president has attended the Italian’s infamous bunga bunga parties while Mr Putin reciprocated with holidays at his dacha on the Black Sea.", + "length": 152 + }, + { + "text": "Now their unlikely bond has taken a new turn, with Mr Putin said to be poised to make the former Italian prime minister the Russian envoy to the Vatican.", + "length": 153 + }, + { + "text": "And there is a long-running religious feud between the Vatican and the Russian Orthodox church, which have been at loggerheads since the fall of the Soviet Union.", + "length": 162 + }, + { + "text": "After being convicted of tax fraud, the politician is this week facing expulsion from the Italian parliament, a move that would strip him of some legal protection.", + "length": 163 + }, + { + "text": "’ Mr Putin held private talks with Pope Francis about the Syrian conflict, months after the Pope sent him an angry letter criticising the G20's failure to provide more aid.", + "length": 174 + }, + { + "text": "A political source told an Italian newspaper: ‘Putin’s idea is clear: he plans to nominate Berlusconi as ambassador to the Holy See, a strategy that would save him from persecution by the judiciary.", + "length": 202 + }, + { + "text": "Friends: It has been claimed Vladimir Putin, left, who met the Pope in Rome's Vatican City, reportedly wants to make disgraced Silvio Berlusconi, right, a Russian ambassador - giving him diplomatic immunity .", + "length": 208 + }, + { + "text": "As well as legal immunity, the  post would give Berlusconi a diplomatic passport, allowing him to visit his luxury overseas properties – something he has been denied since his old passport was  cancelled by the authorities.", + "length": 227 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.6688311100006104 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:48.910133689Z", + "first_section_created": "2025-12-23T09:36:48.911919561Z", + "last_section_published": "2025-12-23T09:36:48.912155671Z", + "all_results_received": "2025-12-23T09:36:48.974058968Z", + "output_generated": "2025-12-23T09:36:48.974242575Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 2, + "workers_processing_time_ms": 61, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:48.911919561Z", + "publish_time": "2025-12-23T09:36:48.912155671Z", + "first_worker_start": "2025-12-23T09:36:48.912705293Z", + "last_worker_end": "2025-12-23T09:36:48.973146Z", + "total_journey_time_ms": 61, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:48.912658891Z", + "start_time": "2025-12-23T09:36:48.912743195Z", + "end_time": "2025-12-23T09:36:48.912833098Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:48.912939Z", + "start_time": "2025-12-23T09:36:48.9131Z", + "end_time": "2025-12-23T09:36:48.973146Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 60 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:48.912653291Z", + "start_time": "2025-12-23T09:36:48.912726194Z", + "end_time": "2025-12-23T09:36:48.912824198Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:48.912581088Z", + "start_time": "2025-12-23T09:36:48.912705293Z", + "end_time": "2025-12-23T09:36:48.912760595Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 60, + "min_processing_ms": 60, + "max_processing_ms": 60, + "avg_processing_ms": 60, + "median_processing_ms": 60, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 4134, + "slowest_section_id": 0, + "slowest_section_time_ms": 61 + } +} diff --git a/data/output/00804f0661053a33002efa237345781408354903.json b/data/output/00804f0661053a33002efa237345781408354903.json new file mode 100644 index 0000000..e045bbc --- /dev/null +++ b/data/output/00804f0661053a33002efa237345781408354903.json @@ -0,0 +1,302 @@ +{ + "file_name": "00804f0661053a33002efa237345781408354903.txt", + "total_words": 584, + "top_n_words": [ + { + "word": "the", + "count": 42 + }, + { + "word": "to", + "count": 26 + }, + { + "word": "a", + "count": 22 + }, + { + "word": "in", + "count": 15 + }, + { + "word": "murray", + "count": 14 + }, + { + "word": "janowicz", + "count": 10 + }, + { + "word": "3", + "count": 9 + }, + { + "word": "he", + "count": 9 + }, + { + "word": "of", + "count": 9 + }, + { + "word": "set", + "count": 9 + } + ], + "sorted_sentences": [ + { + "text": "4.", + "length": 2 + }, + { + "text": "1.", + "length": 2 + }, + { + "text": "2.", + "length": 2 + }, + { + "text": "3.", + "length": 2 + }, + { + "text": "Qualified .", + "length": 11 + }, + { + "text": "Paris - 1,000 .", + "length": 15 + }, + { + "text": "Beijing - 500 .", + "length": 15 + }, + { + "text": "Shanghai - 1,000 .", + "length": 18 + }, + { + "text": "Marin Cilic (3,935) 6.", + "length": 22 + }, + { + "text": "David Ferrer (3,535) 8.", + "length": 23 + }, + { + "text": "Roger Federer - 7,020 .", + "length": 23 + }, + { + "text": "Milos Raonic (3,440) 10.", + "length": 24 + }, + { + "text": "Stan Wawrinka (4,795) 5.", + "length": 24 + }, + { + "text": "Kei Nishikori (3,845) 7.", + "length": 24 + }, + { + "text": "Tomas Berdych (3,510) 9.", + "length": 24 + }, + { + "text": "Novak Djokovic - 8,150 points .", + "length": 31 + }, + { + "text": "Rafael Nadal (6,645) Still to qualify .", + "length": 39 + }, + { + "text": "Andy Murray (3,405) Points he will be awarded if he wins the following: .", + "length": 73 + }, + { + "text": "The 27-year-old Scotsman looked in a relaxed mood during his clash against the Pole .", + "length": 85 + }, + { + "text": "Andy Murray followed up his recent triumph in the Shenzhen Open by defeating Jerzy Janowicz .", + "length": 95 + }, + { + "text": "Janowicz raced to a 3-0 advantage after he took the first of two break points on the Murray serve.", + "length": 98 + }, + { + "text": "Janowicz then moved 5-1 ahead but Murray hit back, claiming four games in a row to draw level at 5-5.", + "length": 101 + }, + { + "text": "After going 5-1 up only for Murray to hit back, the Pole went on to claim the first set on a tie-break.", + "length": 103 + }, + { + "text": "It meant that if he were to win, Murray would need to battle back from a set down for his third match in a row.", + "length": 111 + }, + { + "text": "The Scot won his first service game of the match to stay in the first set before a break of serve saw him claim back-to-back games.", + "length": 131 + }, + { + "text": "The second set went Murray's way thanks to a single break of Janowicz in the ninth game before he dominated the third to claim victory.", + "length": 135 + }, + { + "text": "Andy Murray fought from a set down to defeat Jerzy Janowicz 6-7 (11/9) 6-4 6-2 and book his place in the second round of the China Open.", + "length": 136 + }, + { + "text": "The second set went on serve until the ninth game, where the number six seed broke Janowicz to take a 5-4 lead before taking the set 6-4.", + "length": 137 + }, + { + "text": "However, while the Scot broke back immediately he was not able to hold his serve for the third consecutive game and slump to a 4-1 deficit.", + "length": 139 + }, + { + "text": "Murray started off in the worst possible fashion as he was broken by Janowicz in the first game of the match before the Pole claimed a 2-0 lead.", + "length": 144 + }, + { + "text": "Murray, pictured after winning the Shenzhen Open,  will be hoping to progress to the later rounds of Beijing, Shanghai and Paris to book a place in the ATP Finals .", + "length": 165 + }, + { + "text": "Murray then served for the match and claimed a battling victory in a match time of two hours and 28 minutes to line up a second-round meeting with American John Isner.", + "length": 167 + }, + { + "text": "It meant the game went into a decider and an early break for Murray saw him go 3-1 up, before another saw him move 5-2 ahead and just one game away from a place in the second round.", + "length": 181 + }, + { + "text": "Janowicz then served to love to take the first set to a tie-break, which was hotly contested with the duo matching each other all the way until the Pole found a way through to win 11-9.", + "length": 185 + }, + { + "text": "Murray served out the next before breaking Janowicz, although his momentum stuttered in the next, as he trailed 30-0 on serve before firing back to claim a 6-5 lead - the first time he was ahead in the match.", + "length": 208 + }, + { + "text": "Murray, who defeated Spain's Tommy Robredo on Sunday to win the Shenzhen Open - his first tournament victory since winning Wimbledon last year, looked out of sorts in the opening set before firing back to give Janowicz a real contest.", + "length": 234 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.5582074522972107 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:49.41295497Z", + "first_section_created": "2025-12-23T09:36:49.41419382Z", + "last_section_published": "2025-12-23T09:36:49.414365827Z", + "all_results_received": "2025-12-23T09:36:49.47766748Z", + "output_generated": "2025-12-23T09:36:49.477813086Z", + "total_processing_time_ms": 64, + "file_splitting_time_ms": 1, + "workers_processing_time_ms": 63, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:49.41419382Z", + "publish_time": "2025-12-23T09:36:49.414365827Z", + "first_worker_start": "2025-12-23T09:36:49.414913849Z", + "last_worker_end": "2025-12-23T09:36:49.476824Z", + "total_journey_time_ms": 62, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:49.414857647Z", + "start_time": "2025-12-23T09:36:49.414913849Z", + "end_time": "2025-12-23T09:36:49.414961651Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:49.415079Z", + "start_time": "2025-12-23T09:36:49.415214Z", + "end_time": "2025-12-23T09:36:49.476824Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 61 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:49.414857447Z", + "start_time": "2025-12-23T09:36:49.41492545Z", + "end_time": "2025-12-23T09:36:49.414989152Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:49.414883148Z", + "start_time": "2025-12-23T09:36:49.414948851Z", + "end_time": "2025-12-23T09:36:49.414975752Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 61, + "min_processing_ms": 61, + "max_processing_ms": 61, + "avg_processing_ms": 61, + "median_processing_ms": 61, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2964, + "slowest_section_id": 0, + "slowest_section_time_ms": 62 + } +} diff --git a/data/output/008078859169ea1d7bb07b8d68e78f1d44d1c480.json b/data/output/008078859169ea1d7bb07b8d68e78f1d44d1c480.json new file mode 100644 index 0000000..59965c9 --- /dev/null +++ b/data/output/008078859169ea1d7bb07b8d68e78f1d44d1c480.json @@ -0,0 +1,246 @@ +{ + "file_name": "008078859169ea1d7bb07b8d68e78f1d44d1c480.txt", + "total_words": 528, + "top_n_words": [ + { + "word": "the", + "count": 18 + }, + { + "word": "to", + "count": 16 + }, + { + "word": "a", + "count": 13 + }, + { + "word": "in", + "count": 13 + }, + { + "word": "and", + "count": 11 + }, + { + "word": "airbnb", + "count": 10 + }, + { + "word": "council", + "count": 10 + }, + { + "word": "has", + "count": 8 + }, + { + "word": "like", + "count": 7 + }, + { + "word": "of", + "count": 7 + } + ], + "sorted_sentences": [ + { + "text": "I immediately saw $1.", + "length": 21 + }, + { + "text": "1 million plus an additional $110,000 a day.", + "length": 44 + }, + { + "text": "1 million and thought 'what am I going to do?", + "length": 45 + }, + { + "text": "She said her council said 'there's not much cooking involved there.", + "length": 67 + }, + { + "text": "' The registration costs and revenue tax varies from council to council.", + "length": 72 + }, + { + "text": "But the rapid rise to prominence has seen a phenomenon that has been deemed as 'growth pains.", + "length": 93 + }, + { + "text": "Since launching in 2008, Airbnb has since amassed over 500,000 listings in 33,000 cities and 192 countries.", + "length": 107 + }, + { + "text": "But the fines for continuing to rent unregistered short term accommodation are becoming ubiquitously severe.", + "length": 108 + }, + { + "text": "' Statton typically offered fruit, muesli and bread for guests to help themselves-rather than a hot breakfast.", + "length": 110 + }, + { + "text": "'Queensland and Victoria both already have clear rules that allow people to rent out their homes for any period of time,' he said.", + "length": 130 + }, + { + "text": "Residents are reporting being contacted by the local council because the accommodation they offer is not a legitimate bed and breakfast.", + "length": 136 + }, + { + "text": "This week, a study from Fairfax shows this phenomenon appears to have reached Australia, with local councils tightening up on unregistered rooms.", + "length": 146 + }, + { + "text": "Some of the rooms offered in Airbnb in Sydney: Since launching in 2008, Airbnb has since amassed over 500,000 listings in 33,000 cities and 192 countries .", + "length": 155 + }, + { + "text": "Planning Minister Pru Goward said NSW council has 'not caught up with the rapid development of the collaborative consumption market led by companies like Airbnb'.", + "length": 162 + }, + { + "text": "Councils in NSW are tightening up on sites like Airbnb by issuing hefty fines to those who offer unregistered rooms, pressuring them to register with their local council .", + "length": 172 + }, + { + "text": "Others have requested home owners become meet these standards in order to continue renting rooms-a process which often includes expensive renovations like fitting commercial kitchens.", + "length": 183 + }, + { + "text": "Newton local Lynn Statton said her local council told her 'If we approve you as a bed and breakfast you can in the future serve hot breakfast and you'll be needing a hygienic commercial kitchen.", + "length": 194 + }, + { + "text": "In a bid to counter the soaring popularity of short term accommodation websites, the NSW council have started handing out over $1 million to those offering unregistered rooms on sites like Airbnb.", + "length": 196 + }, + { + "text": "Planning Minister Pru Goward said NSW council has 'not caught up with the rapid development of the collaborative consumption market led by companies like Airbnb' 'It was quite a shocking letter to get.", + "length": 201 + }, + { + "text": "One resident called Shauna, who chose to keep her full name disclosed, told Fairfax she was contacted with a letter warning she has 10 days to say why her council should not take legal action against her, and that she is facing fines of f $1.", + "length": 242 + }, + { + "text": "' ' 'I feel like they're making the rules up as they go along because they have no clear guidelines yet,' Sam McDonagh, Airbnb's country manager for Australia and New Zealand, argued the framework in NSW  was not as clear as in other states.", + "length": 242 + }, + { + "text": "' According to an American study from Skift, Airbnb has made 'local municipalities eager for tax revenue, either in the form of income tax from the owners who use the site or from Airbnb based on arguments that it acts more like a hotel offering inventory than a classified advertising site.", + "length": 291 + } + ], + "sentiment": { + "sentiment": "neutral", + "score": 0.4645020663738251 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:36:49.914655506Z", + "first_section_created": "2025-12-23T09:36:49.914987519Z", + "last_section_published": "2025-12-23T09:36:49.915161626Z", + "all_results_received": "2025-12-23T09:36:49.970178145Z", + "output_generated": "2025-12-23T09:36:49.970345252Z", + "total_processing_time_ms": 55, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 55, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:36:49.914987519Z", + "publish_time": "2025-12-23T09:36:49.915161626Z", + "first_worker_start": "2025-12-23T09:36:49.915674947Z", + "last_worker_end": "2025-12-23T09:36:49.969301Z", + "total_journey_time_ms": 54, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:36:49.915620745Z", + "start_time": "2025-12-23T09:36:49.915686847Z", + "end_time": "2025-12-23T09:36:49.91574585Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:36:49.916005Z", + "start_time": "2025-12-23T09:36:49.916143Z", + "end_time": "2025-12-23T09:36:49.969301Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 53 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:36:49.915686947Z", + "start_time": "2025-12-23T09:36:49.91574235Z", + "end_time": "2025-12-23T09:36:49.915823753Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:36:49.915612144Z", + "start_time": "2025-12-23T09:36:49.915674947Z", + "end_time": "2025-12-23T09:36:49.915705848Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 53, + "min_processing_ms": 53, + "max_processing_ms": 53, + "avg_processing_ms": 53, + "median_processing_ms": 53, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 3132, + "slowest_section_id": 0, + "slowest_section_time_ms": 54 + } +} diff --git a/data/output/performance-test.json b/data/output/performance-test.json new file mode 100644 index 0000000..77eea7d --- /dev/null +++ b/data/output/performance-test.json @@ -0,0 +1,178 @@ +{ + "file_name": "performance-test.txt", + "total_words": 32, + "top_n_words": [ + { + "word": "and", + "count": 2 + }, + { + "word": "the", + "count": 2 + }, + { + "word": "a", + "count": 1 + }, + { + "word": "architecture", + "count": 1 + }, + { + "word": "component", + "count": 1 + }, + { + "word": "distributed", + "count": 1 + }, + { + "word": "efficiently", + "count": 1 + }, + { + "word": "enables", + "count": 1 + }, + { + "word": "every", + "count": 1 + }, + { + "word": "file", + "count": 1 + } + ], + "sorted_sentences": [ + { + "text": "Every component performs well.", + "length": 30 + }, + { + "text": "Workers handle tasks quickly and reliably.", + "length": 42 + }, + { + "text": "This is a test file for performance tracking.", + "length": 45 + }, + { + "text": "The system works great and processes text efficiently.", + "length": 54 + }, + { + "text": "The distributed architecture enables parallel processing.", + "length": 57 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.7772926688194275 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:22:44.950864022Z", + "first_section_created": "2025-12-23T09:22:44.950914124Z", + "last_section_published": "2025-12-23T09:22:44.951143837Z", + "all_results_received": "2025-12-23T09:22:49.006658606Z", + "output_generated": "2025-12-23T09:22:49.006773813Z", + "total_processing_time_ms": 4055, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 4055, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:22:44.950914124Z", + "publish_time": "2025-12-23T09:22:44.951143837Z", + "first_worker_start": "2025-12-23T09:22:44.952241196Z", + "last_worker_end": "2025-12-23T09:22:49.005625Z", + "total_journey_time_ms": 4054, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:22:44.95213759Z", + "start_time": "2025-12-23T09:22:44.952290398Z", + "end_time": "2025-12-23T09:22:44.952333101Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:22:48.84523Z", + "start_time": "2025-12-23T09:22:48.845288Z", + "end_time": "2025-12-23T09:22:49.005625Z", + "queue_wait_time_ms": 3894, + "processing_time_ms": 160 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:22:44.952044685Z", + "start_time": "2025-12-23T09:22:44.952241196Z", + "end_time": "2025-12-23T09:22:44.952259797Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:22:44.95213489Z", + "start_time": "2025-12-23T09:22:44.952309299Z", + "end_time": "2025-12-23T09:22:44.9523161Z", + "queue_wait_time_ms": 1, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 160, + "min_processing_ms": 160, + "max_processing_ms": 160, + "avg_processing_ms": 160, + "median_processing_ms": 160, + "total_queue_wait_ms": 3894, + "avg_queue_wait_ms": 3894 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 1, + "avg_queue_wait_ms": 1 + } + }, + "total_sections": 1, + "average_section_size": 233, + "slowest_section_id": 0, + "slowest_section_time_ms": 4054 + } +} diff --git a/data/output/sample-russian.json b/data/output/sample-russian.json new file mode 100644 index 0000000..2e62b36 --- /dev/null +++ b/data/output/sample-russian.json @@ -0,0 +1,239 @@ +{ + "file_name": "sample-russian.txt", + "total_words": 110, + "top_n_words": [ + { + "word": "иванов", + "count": 4 + }, + { + "word": "иванова", + "count": 4 + }, + { + "word": "с", + "count": 4 + }, + { + "word": "для", + "count": 3 + }, + { + "word": "иван", + "count": 3 + }, + { + "word": "ивана", + "count": 3 + }, + { + "word": "были", + "count": 2 + }, + { + "word": "в", + "count": 2 + }, + { + "word": "все", + "count": 2 + }, + { + "word": "иванович", + "count": 2 + } + ], + "sorted_sentences": [ + { + "text": "Работа с Ивановым всегда продуктивна.", + "length": 69 + }, + { + "text": "Коллеги Иванова высоко ценят его опыт.", + "length": 70 + }, + { + "text": "Иван будет отвечать за архитектуру системы.", + "length": 80 + }, + { + "text": "Все были впечатлены техническими знаниями Ивана.", + "length": 90 + }, + { + "text": "Ивану удаётся находить решения для сложных задач.", + "length": 91 + }, + { + "text": "Презентация Ивана была хорошо принята аудиторией.", + "length": 92 + }, + { + "text": "Работа Иванова связана с распределёнными системами.", + "length": 96 + }, + { + "text": "Ивану Ивановичу нравится программирование на Go и Python.", + "length": 98 + }, + { + "text": "Все согласны, что Иванов - идеальный выбор для этой роли.", + "length": 102 + }, + { + "text": "Идеи Иванова о внедрении RabbitMQ были хорошо восприняты командой.", + "length": 115 + }, + { + "text": "В свободное время Иван Иванович любит читать технические книги.", + "length": 117 + }, + { + "text": "Недавно Иванов выступил с докладом на конференции разработчиков.", + "length": 120 + }, + { + "text": "Вчера Иванов встретился с коллегой для обсуждения нового проекта.", + "length": 121 + }, + { + "text": "Иванов Иван Иванович работает в крупной технологической компании.", + "length": 122 + }, + { + "text": "Руководитель попросил Иванова Ивана Ивановича возглавить новую инициативу.", + "length": 140 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.6746940016746521 + }, + "name_replaced": true, + "source_name": "Иванов Иван Иванович", + "target_name": "Смирнов Семён Семёнович", + "performance": { + "file_detection_time": "2025-12-23T09:22:45.451904363Z", + "first_section_created": "2025-12-23T09:22:45.451951466Z", + "last_section_published": "2025-12-23T09:22:45.45222288Z", + "all_results_received": "2025-12-23T09:22:49.237613325Z", + "output_generated": "2025-12-23T09:22:49.23771263Z", + "total_processing_time_ms": 3785, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 3785, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:22:45.451951466Z", + "publish_time": "2025-12-23T09:22:45.45222288Z", + "first_worker_start": "2025-12-23T09:22:45.452899217Z", + "last_worker_end": "2025-12-23T09:22:49.236089Z", + "total_journey_time_ms": 3784, + "worker_timings": { + "namereplacement": { + "worker_type": "namereplacement", + "receive_time": "2025-12-23T09:22:45.452948619Z", + "start_time": "2025-12-23T09:22:45.45314433Z", + "end_time": "2025-12-23T09:22:45.458738431Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 5 + }, + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:22:45.452761409Z", + "start_time": "2025-12-23T09:22:45.452907117Z", + "end_time": "2025-12-23T09:22:45.453041124Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:22:48.963959Z", + "start_time": "2025-12-23T09:22:48.964016Z", + "end_time": "2025-12-23T09:22:49.236089Z", + "queue_wait_time_ms": 3511, + "processing_time_ms": 272 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:22:45.452811112Z", + "start_time": "2025-12-23T09:22:45.453045825Z", + "end_time": "2025-12-23T09:22:45.45313813Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:22:45.452719107Z", + "start_time": "2025-12-23T09:22:45.452899217Z", + "end_time": "2025-12-23T09:22:45.452972321Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "namereplacement": { + "worker_type": "namereplacement", + "total_sections": 1, + "total_processing_ms": 5, + "min_processing_ms": 5, + "max_processing_ms": 5, + "avg_processing_ms": 5, + "median_processing_ms": 5, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 272, + "min_processing_ms": 272, + "max_processing_ms": 272, + "avg_processing_ms": 272, + "median_processing_ms": 272, + "total_queue_wait_ms": 3511, + "avg_queue_wait_ms": 3511 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1537, + "slowest_section_id": 0, + "slowest_section_time_ms": 3784 + } +} diff --git a/data/output/sample-russian.txt b/data/output/sample-russian.txt new file mode 100644 index 0000000..4d857f0 --- /dev/null +++ b/data/output/sample-russian.txt @@ -0,0 +1,5 @@ +Смирнов Семён Семёнович работает в крупной технологической компании. Работа Смирнова связана с распределёнными системами. Семёну Семёновичу нравится программирование на Go и Python. +Вчера Смирнов встретился с коллегой для обсуждения нового проекта. Идеи Смирнова о внедрении RabbitMQ были хорошо восприняты командой. Все были впечатлены техническими знаниями Семёна. +Руководитель попросил Смирнова Семёна Семёновича возглавить новую инициативу. Семён будет отвечать за архитектуру системы. Все согласны, что Смирнов - идеальный выбор для этой роли. +В свободное время Семён Семёнович любит читать технические книги. Недавно Смирнов выступил с докладом на конференции разработчиков. Презентация Семёна была хорошо принята аудиторией. +Коллеги Смирнова высоко ценят его опыт. Работа с Смирновым всегда продуктивна. Семёну удаётся находить решения для сложных задач. \ No newline at end of file diff --git a/data/output/sample-with-name-replacement.json b/data/output/sample-with-name-replacement.json new file mode 100644 index 0000000..d4c24b5 --- /dev/null +++ b/data/output/sample-with-name-replacement.json @@ -0,0 +1,279 @@ +{ + "file_name": "sample-with-name-replacement.txt", + "total_words": 221, + "top_n_words": [ + { + "word": "the", + "count": 11 + }, + { + "word": "ivanov", + "count": 10 + }, + { + "word": "his", + "count": 8 + }, + { + "word": "and", + "count": 7 + }, + { + "word": "for", + "count": 7 + }, + { + "word": "to", + "count": 7 + }, + { + "word": "i", + "count": 6 + }, + { + "word": "ivan", + "count": 6 + }, + { + "word": "a", + "count": 5 + }, + { + "word": "is", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "I.", + "length": 2 + }, + { + "text": "I.", + "length": 2 + }, + { + "text": "I.", + "length": 2 + }, + { + "text": "I.", + "length": 2 + }, + { + "text": "Mr.", + "length": 3 + }, + { + "text": "Looking forward, I.", + "length": 19 + }, + { + "text": "to lead the new initiative.", + "length": 27 + }, + { + "text": "The project manager asked Ivanov I.", + "length": 35 + }, + { + "text": "The future looks bright for both Ivanov and his projects.", + "length": 57 + }, + { + "text": "He has been developing distributed systems for many years.", + "length": 58 + }, + { + "text": "IVANOV has contributed significantly to open-source projects.", + "length": 61 + }, + { + "text": "The presentation by Ivanov was well-received by the audience.", + "length": 61 + }, + { + "text": "Ivanov presented his ideas about implementing a RabbitMQ-based solution.", + "length": 72 + }, + { + "text": "Ivanov plans to continue contributing to the field of distributed systems.", + "length": 74 + }, + { + "text": "The team was impressed by Ivan's technical knowledge and problem-solving skills.", + "length": 80 + }, + { + "text": "His GitHub profile shows numerous repositories related to distributed computing.", + "length": 80 + }, + { + "text": "Everyone agrees that Ivanov Ivan is the perfect choice for this challenging role.", + "length": 81 + }, + { + "text": "Yesterday, Ivanov met with his colleague to discuss the new project requirements.", + "length": 81 + }, + { + "text": "Ivan will be responsible for architecting the system and mentoring junior developers.", + "length": 85 + }, + { + "text": "He recently gave a talk about scalable architectures at a major developer conference.", + "length": 85 + }, + { + "text": "His passion for technology and dedication to excellence make him an invaluable team member.", + "length": 91 + }, + { + "text": "In his personal life, Ivan Ivanovich enjoys reading technical books and attending conferences.", + "length": 94 + }, + { + "text": "Ivanov Ivan Ivanovich is a talented software engineer who works at a major technology company.", + "length": 94 + }, + { + "text": "Ivan Ivanovich is known for his expertise in message queue systems and microservices architecture.", + "length": 98 + }, + { + "text": "Ivanov is respected in the developer community for his clear documentation and helpful code reviews.", + "length": 100 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.591088056564331 + }, + "name_replaced": true, + "source_name": "Ivanov Ivan Ivanovich", + "target_name": "Smirnov Semyon Semyonovich", + "performance": { + "file_detection_time": "2025-12-23T09:22:45.952981107Z", + "first_section_created": "2025-12-23T09:22:45.953025009Z", + "last_section_published": "2025-12-23T09:22:45.953213419Z", + "all_results_received": "2025-12-23T09:22:49.062128689Z", + "output_generated": "2025-12-23T09:22:49.062350701Z", + "total_processing_time_ms": 3109, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 3108, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:22:45.953025009Z", + "publish_time": "2025-12-23T09:22:45.953213419Z", + "first_worker_start": "2025-12-23T09:22:45.953737847Z", + "last_worker_end": "2025-12-23T09:22:49.060655Z", + "total_journey_time_ms": 3107, + "worker_timings": { + "namereplacement": { + "worker_type": "namereplacement", + "receive_time": "2025-12-23T09:22:45.953742448Z", + "start_time": "2025-12-23T09:22:45.953904856Z", + "end_time": "2025-12-23T09:22:45.954194472Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:22:45.953706846Z", + "start_time": "2025-12-23T09:22:45.953771749Z", + "end_time": "2025-12-23T09:22:45.953855154Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:22:49.0066Z", + "start_time": "2025-12-23T09:22:49.0067Z", + "end_time": "2025-12-23T09:22:49.060655Z", + "queue_wait_time_ms": 3053, + "processing_time_ms": 53 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:22:45.953756148Z", + "start_time": "2025-12-23T09:22:45.953811451Z", + "end_time": "2025-12-23T09:22:45.953867654Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:22:45.953685045Z", + "start_time": "2025-12-23T09:22:45.953737847Z", + "end_time": "2025-12-23T09:22:45.953757048Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "namereplacement": { + "worker_type": "namereplacement", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 53, + "min_processing_ms": 53, + "max_processing_ms": 53, + "avg_processing_ms": 53, + "median_processing_ms": 53, + "total_queue_wait_ms": 3053, + "avg_queue_wait_ms": 3053 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1467, + "slowest_section_id": 0, + "slowest_section_time_ms": 3107 + } +} diff --git a/data/output/sample-with-name-replacement.txt b/data/output/sample-with-name-replacement.txt new file mode 100644 index 0000000..9709cef --- /dev/null +++ b/data/output/sample-with-name-replacement.txt @@ -0,0 +1,6 @@ +Smirnov semyon semyonovich is a talented software engineer who works at a major technology company. He has been developing distributed systems for many years. Semyon semyonovich is known for his expertise in message queue systems and microservices architecture. +Yesterday, Smirnov met with his colleague to discuss the new project requirements. Mr. Smirnov presented his ideas about implementing a RabbitMQ-based solution. The team was impressed by Ivan's technical knowledge and problem-solving skills. +SMIRNOV has contributed significantly to open-source projects. His GitHub profile shows numerous repositories related to distributed computing. S. s. smirnov is respected in the developer community for his clear documentation and helpful code reviews. +The project manager asked Smirnov I. I. to lead the new initiative. Ivan will be responsible for architecting the system and mentoring junior developers. Everyone agrees that Smirnov semyon is the perfect choice for this challenging role. +In his personal life, Semyon semyonovich enjoys reading technical books and attending conferences. He recently gave a talk about scalable architectures at a major developer conference. The presentation by Smirnov was well-received by the audience. +Looking forward, S. s. smirnov plans to continue contributing to the field of distributed systems. His passion for technology and dedication to excellence make him an invaluable team member. The future looks bright for both Smirnov and his projects. \ No newline at end of file diff --git a/data/output/sample1.json b/data/output/sample1.json new file mode 100644 index 0000000..37255d5 --- /dev/null +++ b/data/output/sample1.json @@ -0,0 +1,258 @@ +{ + "file_name": "sample1.txt", + "total_words": 295, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "text", + "count": 6 + }, + { + "word": "system", + "count": 5 + }, + { + "word": "is", + "count": 4 + }, + { + "word": "of", + "count": 4 + }, + { + "word": "processing", + "count": 4 + }, + { + "word": "this", + "count": 4 + }, + { + "word": "to", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Innovation in this field drives progress across many industries.", + "length": 64 + }, + { + "text": "Modern distributed systems must be fault-tolerant and resilient.", + "length": 64 + }, + { + "text": "It waits for all sections to be processed before merging the data.", + "length": 66 + }, + { + "text": "Performance optimization is essential for handling large datasets.", + "length": 66 + }, + { + "text": "This system processes text data in parallel using multiple workers.", + "length": 67 + }, + { + "text": "The word count worker analyzes the number of words in each section.", + "length": 67 + }, + { + "text": "The top-N words worker identifies the most frequently occurring terms.", + "length": 70 + }, + { + "text": "Sentiment analysis is crucial for understanding the emotional tone of text.", + "length": 75 + }, + { + "text": "The system splits text into manageable sections, enabling parallel processing.", + "length": 78 + }, + { + "text": "This architecture supports horizontal scaling by adding more worker instances.", + "length": 78 + }, + { + "text": "Load balancing ensures that work is distributed evenly among available workers.", + "length": 79 + }, + { + "text": "Monitoring and logging help identify bottlenecks and improve system performance.", + "length": 80 + }, + { + "text": "The aggregator component plays a vital role in combining results from all workers.", + "length": 82 + }, + { + "text": "Docker containers provide isolation and consistency across different environments.", + "length": 82 + }, + { + "text": "Natural language processing continues to evolve with new techniques and approaches.", + "length": 83 + }, + { + "text": "Docker Compose orchestrates multiple services, making deployment simple and repeatable.", + "length": 87 + }, + { + "text": "Cloud-native architectures enable systems to scale dynamically based on workload demands.", + "length": 89 + }, + { + "text": "It demonstrates the power of microservices architecture and message-driven design patterns.", + "length": 91 + }, + { + "text": "This capability has numerous applications in business intelligence and social media monitoring.", + "length": 95 + }, + { + "text": "The distributed text processing system is an amazing achievement in modern software engineering.", + "length": 96 + }, + { + "text": "The future of text processing lies in combining traditional algorithms with deep learning models.", + "length": 97 + }, + { + "text": "Each worker specializes in a specific task, making the overall system more efficient and scalable.", + "length": 98 + }, + { + "text": "RabbitMQ enables efficient communication between different components, allowing them to work together seamlessly.", + "length": 113 + }, + { + "text": "The merge operation ensures that results are combined in the correct order, maintaining data integrity throughout the pipeline.", + "length": 127 + }, + { + "text": "Using advanced machine learning models from Hugging Face, we can determine whether the text expresses positive, negative, or neutral sentiment.", + "length": 143 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.7073483467102051 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:22:46.454020848Z", + "first_section_created": "2025-12-23T09:22:46.454073651Z", + "last_section_published": "2025-12-23T09:22:46.454301663Z", + "all_results_received": "2025-12-23T09:22:49.117083844Z", + "output_generated": "2025-12-23T09:22:49.117320557Z", + "total_processing_time_ms": 2663, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 2662, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:22:46.454073651Z", + "publish_time": "2025-12-23T09:22:46.454301663Z", + "first_worker_start": "2025-12-23T09:22:46.454940898Z", + "last_worker_end": "2025-12-23T09:22:49.116242Z", + "total_journey_time_ms": 2662, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:22:46.454865894Z", + "start_time": "2025-12-23T09:22:46.454940898Z", + "end_time": "2025-12-23T09:22:46.454994901Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:22:49.062164Z", + "start_time": "2025-12-23T09:22:49.062238Z", + "end_time": "2025-12-23T09:22:49.116242Z", + "queue_wait_time_ms": 2607, + "processing_time_ms": 54 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:22:46.454873294Z", + "start_time": "2025-12-23T09:22:46.454970099Z", + "end_time": "2025-12-23T09:22:46.455096206Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:22:46.454861493Z", + "start_time": "2025-12-23T09:22:46.454963899Z", + "end_time": "2025-12-23T09:22:46.455024002Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 54, + "min_processing_ms": 54, + "max_processing_ms": 54, + "avg_processing_ms": 54, + "median_processing_ms": 54, + "total_queue_wait_ms": 2607, + "avg_queue_wait_ms": 2607 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2168, + "slowest_section_id": 0, + "slowest_section_time_ms": 2662 + } +} diff --git a/data/output/sample2.json b/data/output/sample2.json new file mode 100644 index 0000000..60b23d2 --- /dev/null +++ b/data/output/sample2.json @@ -0,0 +1,250 @@ +{ + "file_name": "sample2.txt", + "total_words": 184, + "top_n_words": [ + { + "word": "the", + "count": 7 + }, + { + "word": "to", + "count": 6 + }, + { + "word": "and", + "count": 5 + }, + { + "word": "development", + "count": 4 + }, + { + "word": "of", + "count": 4 + }, + { + "word": "is", + "count": 3 + }, + { + "word": "work", + "count": 3 + }, + { + "word": "communication", + "count": 2 + }, + { + "word": "developers", + "count": 2 + }, + { + "word": "difficult", + "count": 2 + } + ], + "sorted_sentences": [ + { + "text": "These practices lead to better outcomes.", + "length": 40 + }, + { + "text": "The pace of innovation never slows down.", + "length": 40 + }, + { + "text": "Unfortunately, not all projects succeed.", + "length": 40 + }, + { + "text": "Issue trackers organize tasks and priorities.", + "length": 45 + }, + { + "text": "Code reviews improve quality and share knowledge.", + "length": 49 + }, + { + "text": "Technical debt accumulates when shortcuts are taken.", + "length": 52 + }, + { + "text": "Maintenance becomes increasingly difficult over time.", + "length": 53 + }, + { + "text": "Many fail due to poor planning or communication issues.", + "length": 55 + }, + { + "text": "However, success is achievable with the right approach.", + "length": 55 + }, + { + "text": "Remote work has become the norm for many organizations.", + "length": 55 + }, + { + "text": "Seeing users benefit from your work provides motivation.", + "length": 56 + }, + { + "text": "These challenges test the resilience of development teams.", + "length": 58 + }, + { + "text": "Software development is a challenging but rewarding field.", + "length": 58 + }, + { + "text": "Version control systems like Git enable parallel development.", + "length": 61 + }, + { + "text": "Collaboration tools have transformed how teams work together.", + "length": 61 + }, + { + "text": "Automated testing catches bugs early in the development cycle.", + "length": 62 + }, + { + "text": "This sense of purpose drives developers to excel in their craft.", + "length": 64 + }, + { + "text": "Good documentation helps team members understand complex systems.", + "length": 65 + }, + { + "text": "Communication platforms facilitate discussion and decision-making.", + "length": 66 + }, + { + "text": "Every day brings new problems to solve and opportunities to learn.", + "length": 66 + }, + { + "text": "Building something that improves peoples lives is incredibly fulfilling.", + "length": 72 + }, + { + "text": "The satisfaction of solving difficult problems makes the effort worthwhile.", + "length": 75 + }, + { + "text": "Developers must constantly adapt to changing technologies and methodologies.", + "length": 76 + } + ], + "sentiment": { + "sentiment": "negative", + "score": 0.8142186999320984 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:22:46.955100792Z", + "first_section_created": "2025-12-23T09:22:46.955151695Z", + "last_section_published": "2025-12-23T09:22:46.955405308Z", + "all_results_received": "2025-12-23T09:22:49.171586475Z", + "output_generated": "2025-12-23T09:22:49.171830988Z", + "total_processing_time_ms": 2216, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 2216, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:22:46.955151695Z", + "publish_time": "2025-12-23T09:22:46.955405308Z", + "first_worker_start": "2025-12-23T09:22:46.956029342Z", + "last_worker_end": "2025-12-23T09:22:49.170684Z", + "total_journey_time_ms": 2215, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:22:46.955969438Z", + "start_time": "2025-12-23T09:22:46.956029342Z", + "end_time": "2025-12-23T09:22:46.956056543Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:22:49.117049Z", + "start_time": "2025-12-23T09:22:49.117151Z", + "end_time": "2025-12-23T09:22:49.170684Z", + "queue_wait_time_ms": 2161, + "processing_time_ms": 53 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:22:46.955967938Z", + "start_time": "2025-12-23T09:22:46.956059743Z", + "end_time": "2025-12-23T09:22:46.956134047Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:22:46.956050043Z", + "start_time": "2025-12-23T09:22:46.956102646Z", + "end_time": "2025-12-23T09:22:46.956122147Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 53, + "min_processing_ms": 53, + "max_processing_ms": 53, + "avg_processing_ms": 53, + "median_processing_ms": 53, + "total_queue_wait_ms": 2161, + "avg_queue_wait_ms": 2161 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 1351, + "slowest_section_id": 0, + "slowest_section_time_ms": 2215 + } +} diff --git a/data/output/test-performance.json b/data/output/test-performance.json new file mode 100644 index 0000000..056bdbe --- /dev/null +++ b/data/output/test-performance.json @@ -0,0 +1,258 @@ +{ + "file_name": "test-performance.txt", + "total_words": 295, + "top_n_words": [ + { + "word": "the", + "count": 16 + }, + { + "word": "and", + "count": 9 + }, + { + "word": "in", + "count": 9 + }, + { + "word": "text", + "count": 6 + }, + { + "word": "system", + "count": 5 + }, + { + "word": "is", + "count": 4 + }, + { + "word": "of", + "count": 4 + }, + { + "word": "processing", + "count": 4 + }, + { + "word": "this", + "count": 4 + }, + { + "word": "to", + "count": 4 + } + ], + "sorted_sentences": [ + { + "text": "Innovation in this field drives progress across many industries.", + "length": 64 + }, + { + "text": "Modern distributed systems must be fault-tolerant and resilient.", + "length": 64 + }, + { + "text": "It waits for all sections to be processed before merging the data.", + "length": 66 + }, + { + "text": "Performance optimization is essential for handling large datasets.", + "length": 66 + }, + { + "text": "This system processes text data in parallel using multiple workers.", + "length": 67 + }, + { + "text": "The word count worker analyzes the number of words in each section.", + "length": 67 + }, + { + "text": "The top-N words worker identifies the most frequently occurring terms.", + "length": 70 + }, + { + "text": "Sentiment analysis is crucial for understanding the emotional tone of text.", + "length": 75 + }, + { + "text": "The system splits text into manageable sections, enabling parallel processing.", + "length": 78 + }, + { + "text": "This architecture supports horizontal scaling by adding more worker instances.", + "length": 78 + }, + { + "text": "Load balancing ensures that work is distributed evenly among available workers.", + "length": 79 + }, + { + "text": "Monitoring and logging help identify bottlenecks and improve system performance.", + "length": 80 + }, + { + "text": "The aggregator component plays a vital role in combining results from all workers.", + "length": 82 + }, + { + "text": "Docker containers provide isolation and consistency across different environments.", + "length": 82 + }, + { + "text": "Natural language processing continues to evolve with new techniques and approaches.", + "length": 83 + }, + { + "text": "Docker Compose orchestrates multiple services, making deployment simple and repeatable.", + "length": 87 + }, + { + "text": "Cloud-native architectures enable systems to scale dynamically based on workload demands.", + "length": 89 + }, + { + "text": "It demonstrates the power of microservices architecture and message-driven design patterns.", + "length": 91 + }, + { + "text": "This capability has numerous applications in business intelligence and social media monitoring.", + "length": 95 + }, + { + "text": "The distributed text processing system is an amazing achievement in modern software engineering.", + "length": 96 + }, + { + "text": "The future of text processing lies in combining traditional algorithms with deep learning models.", + "length": 97 + }, + { + "text": "Each worker specializes in a specific task, making the overall system more efficient and scalable.", + "length": 98 + }, + { + "text": "RabbitMQ enables efficient communication between different components, allowing them to work together seamlessly.", + "length": 113 + }, + { + "text": "The merge operation ensures that results are combined in the correct order, maintaining data integrity throughout the pipeline.", + "length": 127 + }, + { + "text": "Using advanced machine learning models from Hugging Face, we can determine whether the text expresses positive, negative, or neutral sentiment.", + "length": 143 + } + ], + "sentiment": { + "sentiment": "positive", + "score": 0.7073483467102051 + }, + "name_replaced": false, + "performance": { + "file_detection_time": "2025-12-23T09:22:47.455919921Z", + "first_section_created": "2025-12-23T09:22:47.455967324Z", + "last_section_published": "2025-12-23T09:22:47.456353045Z", + "all_results_received": "2025-12-23T09:22:49.243193225Z", + "output_generated": "2025-12-23T09:22:49.243412237Z", + "total_processing_time_ms": 1787, + "file_splitting_time_ms": 0, + "workers_processing_time_ms": 1786, + "aggregation_wait_time_ms": 0, + "section_journeys": [ + { + "section_id": 0, + "creation_time": "2025-12-23T09:22:47.455967324Z", + "publish_time": "2025-12-23T09:22:47.456353045Z", + "first_worker_start": "2025-12-23T09:22:47.456811169Z", + "last_worker_end": "2025-12-23T09:22:49.242365Z", + "total_journey_time_ms": 1786, + "worker_timings": { + "sentencesort": { + "worker_type": "sentencesort", + "receive_time": "2025-12-23T09:22:47.456756666Z", + "start_time": "2025-12-23T09:22:47.456811169Z", + "end_time": "2025-12-23T09:22:47.456855172Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "receive_time": "2025-12-23T09:22:49.171678Z", + "start_time": "2025-12-23T09:22:49.171765Z", + "end_time": "2025-12-23T09:22:49.242365Z", + "queue_wait_time_ms": 1715, + "processing_time_ms": 70 + }, + "topn": { + "worker_type": "topn", + "receive_time": "2025-12-23T09:22:47.456789868Z", + "start_time": "2025-12-23T09:22:47.456860872Z", + "end_time": "2025-12-23T09:22:47.45701208Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "receive_time": "2025-12-23T09:22:47.456727865Z", + "start_time": "2025-12-23T09:22:47.45682917Z", + "end_time": "2025-12-23T09:22:47.456861072Z", + "queue_wait_time_ms": 0, + "processing_time_ms": 0 + } + } + } + ], + "worker_stats": { + "sentencesort": { + "worker_type": "sentencesort", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "sentiment": { + "worker_type": "sentiment", + "total_sections": 1, + "total_processing_ms": 70, + "min_processing_ms": 70, + "max_processing_ms": 70, + "avg_processing_ms": 70, + "median_processing_ms": 70, + "total_queue_wait_ms": 1715, + "avg_queue_wait_ms": 1715 + }, + "topn": { + "worker_type": "topn", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + }, + "wordcount": { + "worker_type": "wordcount", + "total_sections": 1, + "total_processing_ms": 0, + "min_processing_ms": 0, + "max_processing_ms": 0, + "avg_processing_ms": 0, + "median_processing_ms": 0, + "total_queue_wait_ms": 0, + "avg_queue_wait_ms": 0 + } + }, + "total_sections": 1, + "average_section_size": 2168, + "slowest_section_id": 0, + "slowest_section_time_ms": 1786 + } +} diff --git a/docker-compose.yml b/docker-compose.yml new file mode 100644 index 0000000..6fecf46 --- /dev/null +++ b/docker-compose.yml @@ -0,0 +1,218 @@ +version: '3.8' + +services: + rabbitmq: + image: rabbitmq:3.12-management-alpine + container_name: rabbitmq + ports: + - "5672:5672" + - "15672:15672" + environment: + RABBITMQ_DEFAULT_USER: guest + RABBITMQ_DEFAULT_PASS: guest + healthcheck: + test: ["CMD", "rabbitmq-diagnostics", "ping"] + interval: 10s + timeout: 5s + retries: 5 + networks: + - text-processing + + producer: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: producer + container_name: producer + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + - ${INPUT_DIR}:/data/input:ro + networks: + - text-processing + restart: unless-stopped + + aggregator: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: aggregator + container_name: aggregator + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + - ${OUTPUT_DIR}:/data/output + networks: + - text-processing + restart: unless-stopped + + # Word Count Workers (2 instances as per config) + wordcount-worker-1: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: workers/wordcount + container_name: wordcount-worker-1 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + wordcount-worker-2: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: workers/wordcount + container_name: wordcount-worker-2 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + # Top-N Words Workers (2 instances as per config) + topn-worker-1: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: workers/topn + container_name: topn-worker-1 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + topn-worker-2: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: workers/topn + container_name: topn-worker-2 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + # Sentence Sort Workers (2 instances as per config) + sentencesort-worker-1: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: workers/sentencesort + container_name: sentencesort-worker-1 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + sentencesort-worker-2: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: workers/sentencesort + container_name: sentencesort-worker-2 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + # Sentiment Analysis Workers (2 instances as per config) + sentiment-worker-1: + build: + context: . + dockerfile: Dockerfile.python + container_name: sentiment-worker-1 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + sentiment-worker-2: + build: + context: . + dockerfile: Dockerfile.python + container_name: sentiment-worker-2 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + # Name Replacement Workers (2 instances as per config) + namereplacement-worker-1: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: workers/namereplacement + container_name: namereplacement-worker-1 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + + namereplacement-worker-2: + build: + context: . + dockerfile: Dockerfile.go + args: + SERVICE_NAME: workers/namereplacement + container_name: namereplacement-worker-2 + depends_on: + rabbitmq: + condition: service_healthy + volumes: + - ${CONFIG_FILE}:/config/config.json:ro + networks: + - text-processing + restart: unless-stopped + +networks: + text-processing: + driver: bridge diff --git a/gradle/wrapper/gradle-wrapper.jar b/gradle/wrapper/gradle-wrapper.jar deleted file mode 100644 index 7f93135..0000000 Binary files a/gradle/wrapper/gradle-wrapper.jar and /dev/null differ diff --git a/gradle/wrapper/gradle-wrapper.properties b/gradle/wrapper/gradle-wrapper.properties deleted file mode 100644 index 3fa8f86..0000000 --- a/gradle/wrapper/gradle-wrapper.properties +++ /dev/null @@ -1,7 +0,0 @@ -distributionBase=GRADLE_USER_HOME -distributionPath=wrapper/dists -distributionUrl=https\://services.gradle.org/distributions/gradle-8.4-bin.zip -networkTimeout=10000 -validateDistributionUrl=true -zipStoreBase=GRADLE_USER_HOME -zipStorePath=wrapper/dists diff --git a/gradlew b/gradlew deleted file mode 100755 index 1aa94a4..0000000 --- a/gradlew +++ /dev/null @@ -1,249 +0,0 @@ -#!/bin/sh - -# -# Copyright © 2015-2021 the original authors. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# https://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -############################################################################## -# -# Gradle start up script for POSIX generated by Gradle. -# -# Important for running: -# -# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is -# noncompliant, but you have some other compliant shell such as ksh or -# bash, then to run this script, type that shell name before the whole -# command line, like: -# -# ksh Gradle -# -# Busybox and similar reduced shells will NOT work, because this script -# requires all of these POSIX shell features: -# * functions; -# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», -# «${var#prefix}», «${var%suffix}», and «$( cmd )»; -# * compound commands having a testable exit status, especially «case»; -# * various built-in commands including «command», «set», and «ulimit». -# -# Important for patching: -# -# (2) This script targets any POSIX shell, so it avoids extensions provided -# by Bash, Ksh, etc; in particular arrays are avoided. -# -# The "traditional" practice of packing multiple parameters into a -# space-separated string is a well documented source of bugs and security -# problems, so this is (mostly) avoided, by progressively accumulating -# options in "$@", and eventually passing that to Java. -# -# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, -# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; -# see the in-line comments for details. -# -# There are tweaks for specific operating systems such as AIX, CygWin, -# Darwin, MinGW, and NonStop. -# -# (3) This script is generated from the Groovy template -# https://github.com/gradle/gradle/blob/HEAD/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt -# within the Gradle project. -# -# You can find Gradle at https://github.com/gradle/gradle/. -# -############################################################################## - -# Attempt to set APP_HOME - -# Resolve links: $0 may be a link -app_path=$0 - -# Need this for daisy-chained symlinks. -while - APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path - [ -h "$app_path" ] -do - ls=$( ls -ld "$app_path" ) - link=${ls#*' -> '} - case $link in #( - /*) app_path=$link ;; #( - *) app_path=$APP_HOME$link ;; - esac -done - -# This is normally unused -# shellcheck disable=SC2034 -APP_BASE_NAME=${0##*/} -# Discard cd standard output in case $CDPATH is set (https://github.com/gradle/gradle/issues/25036) -APP_HOME=$( cd "${APP_HOME:-./}" > /dev/null && pwd -P ) || exit - -# Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD=maximum - -warn () { - echo "$*" -} >&2 - -die () { - echo - echo "$*" - echo - exit 1 -} >&2 - -# OS specific support (must be 'true' or 'false'). -cygwin=false -msys=false -darwin=false -nonstop=false -case "$( uname )" in #( - CYGWIN* ) cygwin=true ;; #( - Darwin* ) darwin=true ;; #( - MSYS* | MINGW* ) msys=true ;; #( - NONSTOP* ) nonstop=true ;; -esac - -CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar - - -# Determine the Java command to use to start the JVM. -if [ -n "$JAVA_HOME" ] ; then - if [ -x "$JAVA_HOME/jre/sh/java" ] ; then - # IBM's JDK on AIX uses strange locations for the executables - JAVACMD=$JAVA_HOME/jre/sh/java - else - JAVACMD=$JAVA_HOME/bin/java - fi - if [ ! -x "$JAVACMD" ] ; then - die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -else - JAVACMD=java - if ! command -v java >/dev/null 2>&1 - then - die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. - -Please set the JAVA_HOME variable in your environment to match the -location of your Java installation." - fi -fi - -# Increase the maximum file descriptors if we can. -if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then - case $MAX_FD in #( - max*) - # In POSIX sh, ulimit -H is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - MAX_FD=$( ulimit -H -n ) || - warn "Could not query maximum file descriptor limit" - esac - case $MAX_FD in #( - '' | soft) :;; #( - *) - # In POSIX sh, ulimit -n is undefined. That's why the result is checked to see if it worked. - # shellcheck disable=SC2039,SC3045 - ulimit -n "$MAX_FD" || - warn "Could not set maximum file descriptor limit to $MAX_FD" - esac -fi - -# Collect all arguments for the java command, stacking in reverse order: -# * args from the command line -# * the main class name -# * -classpath -# * -D...appname settings -# * --module-path (only if needed) -# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. - -# For Cygwin or MSYS, switch paths to Windows format before running java -if "$cygwin" || "$msys" ; then - APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) - CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) - - JAVACMD=$( cygpath --unix "$JAVACMD" ) - - # Now convert the arguments - kludge to limit ourselves to /bin/sh - for arg do - if - case $arg in #( - -*) false ;; # don't mess with options #( - /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath - [ -e "$t" ] ;; #( - *) false ;; - esac - then - arg=$( cygpath --path --ignore --mixed "$arg" ) - fi - # Roll the args list around exactly as many times as the number of - # args, so each arg winds up back in the position where it started, but - # possibly modified. - # - # NB: a `for` loop captures its iteration list before it begins, so - # changing the positional parameters here affects neither the number of - # iterations, nor the values presented in `arg`. - shift # remove old arg - set -- "$@" "$arg" # push replacement arg - done -fi - - -# Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' - -# Collect all arguments for the java command: -# * DEFAULT_JVM_OPTS, JAVA_OPTS, JAVA_OPTS, and optsEnvironmentVar are not allowed to contain shell fragments, -# and any embedded shellness will be escaped. -# * For example: A user cannot expect ${Hostname} to be expanded, as it is an environment variable and will be -# treated as '${Hostname}' itself on the command line. - -set -- \ - "-Dorg.gradle.appname=$APP_BASE_NAME" \ - -classpath "$CLASSPATH" \ - org.gradle.wrapper.GradleWrapperMain \ - "$@" - -# Stop when "xargs" is not available. -if ! command -v xargs >/dev/null 2>&1 -then - die "xargs is not available" -fi - -# Use "xargs" to parse quoted args. -# -# With -n1 it outputs one arg per line, with the quotes and backslashes removed. -# -# In Bash we could simply go: -# -# readarray ARGS < <( xargs -n1 <<<"$var" ) && -# set -- "${ARGS[@]}" "$@" -# -# but POSIX shell has neither arrays nor command substitution, so instead we -# post-process each arg (as a line of input to sed) to backslash-escape any -# character that might be a shell metacharacter, then use eval to reverse -# that process (while maintaining the separation between arguments), and wrap -# the whole thing up as a single "set" statement. -# -# This will of course break if any of these variables contains a newline or -# an unmatched quote. -# - -eval "set -- $( - printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | - xargs -n1 | - sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | - tr '\n' ' ' - )" '"$@"' - -exec "$JAVACMD" "$@" diff --git a/gradlew.bat b/gradlew.bat deleted file mode 100644 index 93e3f59..0000000 --- a/gradlew.bat +++ /dev/null @@ -1,92 +0,0 @@ -@rem -@rem Copyright 2015 the original author or authors. -@rem -@rem Licensed under the Apache License, Version 2.0 (the "License"); -@rem you may not use this file except in compliance with the License. -@rem You may obtain a copy of the License at -@rem -@rem https://www.apache.org/licenses/LICENSE-2.0 -@rem -@rem Unless required by applicable law or agreed to in writing, software -@rem distributed under the License is distributed on an "AS IS" BASIS, -@rem WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -@rem See the License for the specific language governing permissions and -@rem limitations under the License. -@rem - -@if "%DEBUG%"=="" @echo off -@rem ########################################################################## -@rem -@rem Gradle startup script for Windows -@rem -@rem ########################################################################## - -@rem Set local scope for the variables with windows NT shell -if "%OS%"=="Windows_NT" setlocal - -set DIRNAME=%~dp0 -if "%DIRNAME%"=="" set DIRNAME=. -@rem This is normally unused -set APP_BASE_NAME=%~n0 -set APP_HOME=%DIRNAME% - -@rem Resolve any "." and ".." in APP_HOME to make it shorter. -for %%i in ("%APP_HOME%") do set APP_HOME=%%~fi - -@rem Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. -set DEFAULT_JVM_OPTS="-Xmx64m" "-Xms64m" - -@rem Find java.exe -if defined JAVA_HOME goto findJavaFromJavaHome - -set JAVA_EXE=java.exe -%JAVA_EXE% -version >NUL 2>&1 -if %ERRORLEVEL% equ 0 goto execute - -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:findJavaFromJavaHome -set JAVA_HOME=%JAVA_HOME:"=% -set JAVA_EXE=%JAVA_HOME%/bin/java.exe - -if exist "%JAVA_EXE%" goto execute - -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. - -goto fail - -:execute -@rem Setup the command line - -set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar - - -@rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* - -:end -@rem End local scope for the variables with windows NT shell -if %ERRORLEVEL% equ 0 goto mainEnd - -:fail -rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of -rem the _cmd.exe /c_ return code! -set EXIT_CODE=%ERRORLEVEL% -if %EXIT_CODE% equ 0 set EXIT_CODE=1 -if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% -exit /b %EXIT_CODE% - -:mainEnd -if "%OS%"=="Windows_NT" endlocal - -:omega diff --git a/src/aggregator/main.go b/src/aggregator/main.go new file mode 100644 index 0000000..c7e6d30 --- /dev/null +++ b/src/aggregator/main.go @@ -0,0 +1,605 @@ +package main + +import ( + "encoding/json" + "fmt" + "lab2/common" + "log" + "os" + "sort" + "strings" + "sync" + "time" +) + +type TaskTracker struct { + mu sync.Mutex + tasks map[string]*TaskData + expectedWorkers int +} + +type TaskData struct { + FileName string + TotalSections int + WordCountData map[int]common.WordCountResult + TopNData map[int]common.TopNWordsResult + SentenceSortData map[int]common.SentenceSortResult + SentimentData map[int]common.SentimentResult + NameReplacementData map[int]common.NameReplacementResult + ReceivedCount int + ExpectedWorkers int + NameReplacement *common.NameReplacement + FileDetectionTime time.Time + FirstSectionCreated time.Time + LastSectionPublished time.Time + AllResultsReceived time.Time + SectionTimings map[int]*SectionTimingData + TotalTextSize int +} + +type SectionTimingData struct { + SectionID int + CreationTime time.Time + PublishTime time.Time + WorkerTimings map[string]*WorkerTimingData + FirstWorkerStart time.Time + LastWorkerEnd time.Time +} + +type WorkerTimingData struct { + WorkerType string + ReceiveTime time.Time + StartTime time.Time + EndTime time.Time + ProcessingTimeMs int64 +} + +func NewTaskTracker() *TaskTracker { + return &TaskTracker{ + tasks: make(map[string]*TaskData), + expectedWorkers: 4, + } +} + +func (tt *TaskTracker) AddResult(result common.ResultMessage, totalSections int, fileName string, nameReplacement *common.NameReplacement) bool { + tt.mu.Lock() + defer tt.mu.Unlock() + + if _, exists := tt.tasks[result.TaskID]; !exists { + expectedWorkers := 4 + if nameReplacement != nil { + expectedWorkers = 5 + } + + tt.tasks[result.TaskID] = &TaskData{ + FileName: fileName, + TotalSections: totalSections, + WordCountData: make(map[int]common.WordCountResult), + TopNData: make(map[int]common.TopNWordsResult), + SentenceSortData: make(map[int]common.SentenceSortResult), + SentimentData: make(map[int]common.SentimentResult), + NameReplacementData: make(map[int]common.NameReplacementResult), + ExpectedWorkers: expectedWorkers, + NameReplacement: nameReplacement, + SectionTimings: make(map[int]*SectionTimingData), + } + } + + task := tt.tasks[result.TaskID] + + if _, exists := task.SectionTimings[result.SectionID]; !exists { + task.SectionTimings[result.SectionID] = &SectionTimingData{ + SectionID: result.SectionID, + WorkerTimings: make(map[string]*WorkerTimingData), + } + } + + sectionTiming := task.SectionTimings[result.SectionID] + if _, exists := sectionTiming.WorkerTimings[result.WorkerType]; !exists { + sectionTiming.WorkerTimings[result.WorkerType] = &WorkerTimingData{ + WorkerType: result.WorkerType, + ReceiveTime: result.WorkerReceiveTime, + StartTime: result.ProcessingStartTime, + EndTime: result.ProcessingEndTime, + ProcessingTimeMs: result.ProcessingDurationMs, + } + + if sectionTiming.FirstWorkerStart.IsZero() || result.ProcessingStartTime.Before(sectionTiming.FirstWorkerStart) { + sectionTiming.FirstWorkerStart = result.ProcessingStartTime + } + if result.ProcessingEndTime.After(sectionTiming.LastWorkerEnd) { + sectionTiming.LastWorkerEnd = result.ProcessingEndTime + } + } + + resultBytes, _ := json.Marshal(result.Result) + + switch result.WorkerType { + case "wordcount": + var wcResult common.WordCountResult + json.Unmarshal(resultBytes, &wcResult) + if _, exists := task.WordCountData[result.SectionID]; !exists { + task.WordCountData[result.SectionID] = wcResult + task.ReceivedCount++ + } + case "topn": + var topnResult common.TopNWordsResult + json.Unmarshal(resultBytes, &topnResult) + if _, exists := task.TopNData[result.SectionID]; !exists { + task.TopNData[result.SectionID] = topnResult + task.ReceivedCount++ + } + case "sentencesort": + var ssResult common.SentenceSortResult + json.Unmarshal(resultBytes, &ssResult) + if _, exists := task.SentenceSortData[result.SectionID]; !exists { + task.SentenceSortData[result.SectionID] = ssResult + task.ReceivedCount++ + } + case "sentiment": + var sentResult common.SentimentResult + json.Unmarshal(resultBytes, &sentResult) + if _, exists := task.SentimentData[result.SectionID]; !exists { + task.SentimentData[result.SectionID] = sentResult + task.ReceivedCount++ + } + case "namereplacement": + var nrResult common.NameReplacementResult + json.Unmarshal(resultBytes, &nrResult) + if _, exists := task.NameReplacementData[result.SectionID]; !exists { + task.NameReplacementData[result.SectionID] = nrResult + task.ReceivedCount++ + } + } + + expectedTotal := task.TotalSections * task.ExpectedWorkers + isComplete := task.ReceivedCount >= expectedTotal + if isComplete && task.AllResultsReceived.IsZero() { + task.AllResultsReceived = time.Now() + } + return isComplete +} + +func (tt *TaskTracker) GetTaskData(taskID string) *TaskData { + tt.mu.Lock() + defer tt.mu.Unlock() + return tt.tasks[taskID] +} + +func (tt *TaskTracker) RemoveTask(taskID string) { + tt.mu.Lock() + defer tt.mu.Unlock() + delete(tt.tasks, taskID) +} + +func main() { + log.Println("Starting Aggregator...") + + config, _ := common.LoadConfig("/config/config.json") + + conn, err := common.ConnectRabbitMQ(config.RabbitMQ.URL) + if err != nil { + log.Fatalf("Failed to connect to RabbitMQ: %v", err) + } + defer conn.Close() + + ch, err := conn.Channel() + if err != nil { + log.Fatalf("Failed to open channel: %v", err) + } + defer ch.Close() + + err = common.SetupExchange(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ExchangeType) + if err != nil { + log.Fatalf("Failed to setup exchange: %v", err) + } + + queueName := "queue_results" + routingKey := config.RabbitMQ.ResultsRoutingKey + + queue, err := common.DeclareQueue(ch, queueName) + if err != nil { + log.Fatalf("Failed to declare queue: %v", err) + } + + err = common.BindQueue(ch, queue.Name, routingKey, config.RabbitMQ.ExchangeName) + if err != nil { + log.Fatalf("Failed to bind queue: %v", err) + } + + msgs, err := ch.Consume( + queue.Name, + "", + false, + false, + false, + false, + nil, + ) + if err != nil { + log.Fatalf("Failed to register consumer: %v", err) + } + + tracker := NewTaskTracker() + taskMetadata := make(map[string]struct { + TotalCount int + FileName string + NameReplacement *common.NameReplacement + FileDetectionTime time.Time + SectionCreationTimes map[int]time.Time + SectionPublishTimes map[int]time.Time + TextSizes map[int]int + }) + metaMu := sync.Mutex{} + + log.Println("Aggregator waiting for results...") + + forever := make(chan bool) + + go func() { + for msg := range msgs { + var result common.ResultMessage + err := json.Unmarshal(msg.Body, &result) + if err != nil { + log.Printf("Failed to unmarshal result: %v", err) + msg.Nack(false, false) + continue + } + + metaMu.Lock() + meta, exists := taskMetadata[result.TaskID] + if !exists { + metaMu.Unlock() + msg.Ack(false) + continue + } + metaMu.Unlock() + + log.Printf("Received result from %s for task %s, section %d", + result.WorkerType, result.TaskID, result.SectionID) + + isComplete := tracker.AddResult(result, meta.TotalCount, meta.FileName, meta.NameReplacement) + + msg.Ack(false) + + if isComplete { + log.Printf("Task %s is complete, aggregating results...", result.TaskID) + taskData := tracker.GetTaskData(result.TaskID) + + metaMu.Lock() + if meta, exists := taskMetadata[result.TaskID]; exists { + taskData.FileDetectionTime = meta.FileDetectionTime + for sectionID := 0; sectionID < meta.TotalCount; sectionID++ { + if creationTime, ok := meta.SectionCreationTimes[sectionID]; ok { + if taskData.FirstSectionCreated.IsZero() || creationTime.Before(taskData.FirstSectionCreated) { + taskData.FirstSectionCreated = creationTime + } + } + if publishTime, ok := meta.SectionPublishTimes[sectionID]; ok { + if publishTime.After(taskData.LastSectionPublished) { + taskData.LastSectionPublished = publishTime + } + } + if size, ok := meta.TextSizes[sectionID]; ok { + taskData.TotalTextSize += size + } + if sectionTiming, ok := taskData.SectionTimings[sectionID]; ok { + if creationTime, ok := meta.SectionCreationTimes[sectionID]; ok { + sectionTiming.CreationTime = creationTime + } + if publishTime, ok := meta.SectionPublishTimes[sectionID]; ok { + sectionTiming.PublishTime = publishTime + } + } + } + delete(taskMetadata, result.TaskID) + } + metaMu.Unlock() + + aggregateAndSave(taskData, result.TaskID, config) + tracker.RemoveTask(result.TaskID) + } + } + }() + + go func() { + ch2, _ := conn.Channel() + defer ch2.Close() + + common.SetupExchange(ch2, config.RabbitMQ.ExchangeName, config.RabbitMQ.ExchangeType) + + taskQueue, _ := common.DeclareQueue(ch2, "queue_task_metadata") + ch2.QueueBind(taskQueue.Name, "task.*", config.RabbitMQ.ExchangeName, false, nil) + + taskMsgs, _ := ch2.Consume(taskQueue.Name, "", true, false, false, false, nil) + + for msg := range taskMsgs { + var task common.TaskMessage + json.Unmarshal(msg.Body, &task) + + metaMu.Lock() + if _, exists := taskMetadata[task.TaskID]; !exists { + taskMetadata[task.TaskID] = struct { + TotalCount int + FileName string + NameReplacement *common.NameReplacement + FileDetectionTime time.Time + SectionCreationTimes map[int]time.Time + SectionPublishTimes map[int]time.Time + TextSizes map[int]int + }{ + TotalCount: task.TotalCount, + FileName: task.FileName, + NameReplacement: task.NameReplacement, + FileDetectionTime: task.FileDetectionTime, + SectionCreationTimes: make(map[int]time.Time), + SectionPublishTimes: make(map[int]time.Time), + TextSizes: make(map[int]int), + } + } + meta := taskMetadata[task.TaskID] + meta.SectionCreationTimes[task.SectionID] = task.SectionCreationTime + meta.SectionPublishTimes[task.SectionID] = task.SectionPublishTime + meta.TextSizes[task.SectionID] = len(task.Text) + taskMetadata[task.TaskID] = meta + metaMu.Unlock() + } + }() + + <-forever +} + +func aggregateAndSave(taskData *TaskData, taskID string, config *common.Config) { + var sectionIDs []int + for sectionID := range taskData.WordCountData { + sectionIDs = append(sectionIDs, sectionID) + } + sort.Ints(sectionIDs) + + totalWords := 0 + for _, sectionID := range sectionIDs { + totalWords += taskData.WordCountData[sectionID].TotalWords + } + + wordFreqMap := make(map[string]int) + for _, sectionID := range sectionIDs { + for word, count := range taskData.TopNData[sectionID].Words { + wordFreqMap[word] += count + } + } + + topNWords := getTopNWords(wordFreqMap, config.Processing.TopNWords) + + var sortedSentences []common.SentenceInfo + for _, sectionID := range sectionIDs { + sortedSentences = append(sortedSentences, taskData.SentenceSortData[sectionID].Sentences...) + } + sort.Slice(sortedSentences, func(i, j int) bool { + return sortedSentences[i].Length < sortedSentences[j].Length + }) + + var avgScore float64 + sentimentCounts := make(map[string]int) + for _, sectionID := range sectionIDs { + sentiment := taskData.SentimentData[sectionID] + avgScore += sentiment.Score + sentimentCounts[sentiment.Sentiment]++ + } + avgScore /= float64(len(sectionIDs)) + + dominantSentiment := "neutral" + maxCount := 0 + for sentiment, count := range sentimentCounts { + if count > maxCount { + maxCount = count + dominantSentiment = sentiment + } + } + + outputGenerated := time.Now() + perfMetrics := calculatePerformanceMetrics(taskData, outputGenerated) + + result := common.AggregatedResult{ + FileName: taskData.FileName, + TotalWords: totalWords, + TopNWords: topNWords, + SortedSentences: sortedSentences, + Sentiment: common.SentimentResult{ + Sentiment: dominantSentiment, + Score: avgScore, + }, + NameReplaced: taskData.NameReplacement != nil, + Performance: perfMetrics, + } + + if taskData.NameReplacement != nil { + result.SourceName = taskData.NameReplacement.SourceName + result.TargetName = taskData.NameReplacement.TargetName + } + + baseOutputPath := fmt.Sprintf("%s/%s", config.Output.ResultsDir, + taskData.FileName[:len(taskData.FileName)-4]) + + jsonPath := baseOutputPath + ".json" + file, err := os.Create(jsonPath) + if err != nil { + log.Printf("Failed to create JSON output file: %v", err) + return + } + defer file.Close() + + encoder := json.NewEncoder(file) + encoder.SetIndent("", " ") + err = encoder.Encode(result) + if err != nil { + log.Printf("Failed to write JSON output: %v", err) + return + } + + log.Printf("Saved aggregated results to %s", jsonPath) + + if taskData.NameReplacement != nil && len(taskData.NameReplacementData) > 0 { + var modifiedTextParts []string + for _, sectionID := range sectionIDs { + if nrData, exists := taskData.NameReplacementData[sectionID]; exists { + modifiedTextParts = append(modifiedTextParts, nrData.ModifiedText) + } + } + + modifiedText := strings.Join(modifiedTextParts, "") + + txtPath := baseOutputPath + ".txt" + err = os.WriteFile(txtPath, []byte(modifiedText), 0644) + if err != nil { + log.Printf("Failed to write modified text file: %v", err) + return + } + + log.Printf("Saved modified text to %s", txtPath) + } +} + +func getTopNWords(wordFreq map[string]int, n int) []common.WordFrequency { + var words []common.WordFrequency + for word, count := range wordFreq { + words = append(words, common.WordFrequency{Word: word, Count: count}) + } + + sort.Slice(words, func(i, j int) bool { + if words[i].Count == words[j].Count { + return words[i].Word < words[j].Word + } + return words[i].Count > words[j].Count + }) + + if len(words) > n { + words = words[:n] + } + + return words +} + +func calculatePerformanceMetrics(taskData *TaskData, outputGenerated time.Time) *common.PerformanceMetrics { + if taskData.FileDetectionTime.IsZero() { + return nil + } + + totalProcessingTime := outputGenerated.Sub(taskData.FileDetectionTime).Milliseconds() + fileSplittingTime := taskData.LastSectionPublished.Sub(taskData.FileDetectionTime).Milliseconds() + workersProcessingTime := taskData.AllResultsReceived.Sub(taskData.LastSectionPublished).Milliseconds() + aggregationWaitTime := outputGenerated.Sub(taskData.AllResultsReceived).Milliseconds() + + var sectionJourneys []common.SectionJourney + var slowestSectionID int + var slowestSectionTime int64 + + for sectionID := 0; sectionID < taskData.TotalSections; sectionID++ { + if sectionTiming, exists := taskData.SectionTimings[sectionID]; exists { + workerTimings := make(map[string]*common.WorkerTiming) + + for workerType, workerData := range sectionTiming.WorkerTimings { + queueWaitTime := workerData.StartTime.Sub(sectionTiming.PublishTime).Milliseconds() + if queueWaitTime < 0 { + queueWaitTime = 0 + } + + workerTimings[workerType] = &common.WorkerTiming{ + WorkerType: workerType, + ReceiveTime: workerData.ReceiveTime, + StartTime: workerData.StartTime, + EndTime: workerData.EndTime, + QueueWaitTimeMs: queueWaitTime, + ProcessingTimeMs: workerData.ProcessingTimeMs, + } + } + + journeyTime := sectionTiming.LastWorkerEnd.Sub(sectionTiming.CreationTime).Milliseconds() + if journeyTime > slowestSectionTime { + slowestSectionTime = journeyTime + slowestSectionID = sectionID + } + + sectionJourneys = append(sectionJourneys, common.SectionJourney{ + SectionID: sectionID, + CreationTime: sectionTiming.CreationTime, + PublishTime: sectionTiming.PublishTime, + FirstWorkerStart: sectionTiming.FirstWorkerStart, + LastWorkerEnd: sectionTiming.LastWorkerEnd, + TotalJourneyTimeMs: journeyTime, + WorkerTimings: workerTimings, + }) + } + } + + workerStats := make(map[string]*common.WorkerStats) + workerProcessingTimes := make(map[string][]int64) + workerQueueWaitTimes := make(map[string][]int64) + + for _, sectionTiming := range taskData.SectionTimings { + for workerType, workerData := range sectionTiming.WorkerTimings { + workerProcessingTimes[workerType] = append(workerProcessingTimes[workerType], workerData.ProcessingTimeMs) + + queueWaitTime := workerData.StartTime.Sub(sectionTiming.PublishTime).Milliseconds() + if queueWaitTime < 0 { + queueWaitTime = 0 + } + workerQueueWaitTimes[workerType] = append(workerQueueWaitTimes[workerType], queueWaitTime) + } + } + + for workerType, processingTimes := range workerProcessingTimes { + if len(processingTimes) == 0 { + continue + } + + sort.Slice(processingTimes, func(i, j int) bool { + return processingTimes[i] < processingTimes[j] + }) + + var total int64 + for _, t := range processingTimes { + total += t + } + + var totalQueueWait int64 + queueWaitTimes := workerQueueWaitTimes[workerType] + for _, t := range queueWaitTimes { + totalQueueWait += t + } + + workerStats[workerType] = &common.WorkerStats{ + WorkerType: workerType, + TotalSections: len(processingTimes), + TotalProcessingMs: total, + MinProcessingMs: processingTimes[0], + MaxProcessingMs: processingTimes[len(processingTimes)-1], + AvgProcessingMs: total / int64(len(processingTimes)), + MedianProcessingMs: processingTimes[len(processingTimes)/2], + TotalQueueWaitMs: totalQueueWait, + AvgQueueWaitMs: totalQueueWait / int64(len(queueWaitTimes)), + } + } + + avgSectionSize := 0 + if taskData.TotalSections > 0 { + avgSectionSize = taskData.TotalTextSize / taskData.TotalSections + } + + return &common.PerformanceMetrics{ + FileDetectionTime: taskData.FileDetectionTime, + FirstSectionCreated: taskData.FirstSectionCreated, + LastSectionPublished: taskData.LastSectionPublished, + AllResultsReceived: taskData.AllResultsReceived, + OutputGenerated: outputGenerated, + TotalProcessingTimeMs: totalProcessingTime, + FileSplittingTimeMs: fileSplittingTime, + WorkersProcessingTimeMs: workersProcessingTime, + AggregationWaitTimeMs: aggregationWaitTime, + SectionJourneys: sectionJourneys, + WorkerStats: workerStats, + TotalSections: taskData.TotalSections, + AverageSectionSize: avgSectionSize, + SlowestSection: slowestSectionID, + SlowestSectionTime: slowestSectionTime, + } +} diff --git a/src/common/rabbitmq.go b/src/common/rabbitmq.go new file mode 100644 index 0000000..b654f24 --- /dev/null +++ b/src/common/rabbitmq.go @@ -0,0 +1,94 @@ +package common + +import ( + "encoding/json" + "fmt" + "log" + "os" + "time" + + amqp "github.com/rabbitmq/amqp091-go" +) + +func LoadConfig(path string) (*Config, error) { + data, err := os.ReadFile(path) + if err != nil { + return nil, err + } + + var config Config + err = json.Unmarshal(data, &config) + if err != nil { + return nil, err + } + + return &config, nil +} + +func ConnectRabbitMQ(url string) (*amqp.Connection, error) { + var conn *amqp.Connection + var err error + + for i := 0; i < 30; i++ { + conn, err = amqp.Dial(url) + if err == nil { + return conn, nil + } + log.Printf("Failed to connect to RabbitMQ, retrying... (%d/30)", i+1) + time.Sleep(2 * time.Second) + } + + return nil, fmt.Errorf("failed to connect to RabbitMQ after 30 attempts: %v", err) +} + +func SetupExchange(ch *amqp.Channel, exchangeName, exchangeType string) error { + return ch.ExchangeDeclare( + exchangeName, + exchangeType, + true, // durable + false, // auto-deleted + false, // internal + false, // no-wait + nil, // arguments + ) +} + +func DeclareQueue(ch *amqp.Channel, queueName string) (amqp.Queue, error) { + return ch.QueueDeclare( + queueName, + true, // durable + false, // delete when unused + false, // exclusive + false, // no-wait + nil, // arguments + ) +} + +func BindQueue(ch *amqp.Channel, queueName, routingKey, exchangeName string) error { + return ch.QueueBind( + queueName, + routingKey, + exchangeName, + false, + nil, + ) +} + +func PublishMessage(ch *amqp.Channel, exchangeName, routingKey string, message interface{}) error { + body, err := json.Marshal(message) + if err != nil { + return err + } + + return ch.Publish( + exchangeName, + routingKey, + false, // mandatory + false, // immediate + amqp.Publishing{ + ContentType: "application/json", + Body: body, + DeliveryMode: amqp.Persistent, + }, + ) +} diff --git a/src/common/types.go b/src/common/types.go new file mode 100644 index 0000000..e815fc4 --- /dev/null +++ b/src/common/types.go @@ -0,0 +1,154 @@ +package common + +import "time" + +type Config struct { + RabbitMQ struct { + URL string `json:"url"` + ExchangeName string `json:"exchange_name"` + ExchangeType string `json:"exchange_type"` + TasksRoutingKeyPrefix string `json:"tasks_routing_key_prefix"` + ResultsRoutingKey string `json:"results_routing_key"` + } `json:"rabbitmq"` + Workers struct { + WordCountWorkers int `json:"word_count_workers"` + TopNWorkers int `json:"top_n_workers"` + SentenceSortWorkers int `json:"sentence_sort_workers"` + SentimentWorkers int `json:"sentiment_workers"` + NameReplacementWorkers int `json:"name_replacement_workers"` + } `json:"workers"` + Processing struct { + TopNWords int `json:"top_n_words"` + SectionSizeChars int `json:"section_size_chars"` + } `json:"processing"` + Producer struct { + MonitorIntervalSeconds int `json:"monitor_interval_seconds"` + FileReadyCheckDelayMs int `json:"file_ready_check_delay_ms"` + } `json:"producer"` + Input struct { + DataDir string `json:"data_dir"` + } `json:"input"` + Output struct { + ResultsDir string `json:"results_dir"` + } `json:"output"` +} + +type NameReplacement struct { + SourceName string `json:"source_name"` + TargetName string `json:"target_name"` +} + +type TaskMessage struct { + TaskID string `json:"task_id"` + SectionID int `json:"section_id"` + TotalCount int `json:"total_count"` + Text string `json:"text"` + FileName string `json:"file_name"` + NameReplacement *NameReplacement `json:"name_replacement,omitempty"` + FileDetectionTime time.Time `json:"file_detection_time"` + SectionCreationTime time.Time `json:"section_creation_time"` + SectionPublishTime time.Time `json:"section_publish_time"` +} + +type ResultMessage struct { + TaskID string `json:"task_id"` + SectionID int `json:"section_id"` + WorkerType string `json:"worker_type"` + Result interface{} `json:"result"` + WorkerReceiveTime time.Time `json:"worker_receive_time"` + ProcessingStartTime time.Time `json:"processing_start_time"` + ProcessingEndTime time.Time `json:"processing_end_time"` + ProcessingDurationMs int64 `json:"processing_duration_ms"` +} + +type WordCountResult struct { + TotalWords int `json:"total_words"` +} + +type TopNWordsResult struct { + Words map[string]int `json:"words"` +} + +type SentenceSortResult struct { + Sentences []SentenceInfo `json:"sentences"` +} + +type SentenceInfo struct { + Text string `json:"text"` + Length int `json:"length"` +} + +type SentimentResult struct { + Sentiment string `json:"sentiment"` + Score float64 `json:"score"` +} + +type NameReplacementResult struct { + ModifiedText string `json:"modified_text"` +} + +type AggregatedResult struct { + FileName string `json:"file_name"` + TotalWords int `json:"total_words"` + TopNWords []WordFrequency `json:"top_n_words"` + SortedSentences []SentenceInfo `json:"sorted_sentences"` + Sentiment SentimentResult `json:"sentiment"` + NameReplaced bool `json:"name_replaced"` + SourceName string `json:"source_name,omitempty"` + TargetName string `json:"target_name,omitempty"` + Performance *PerformanceMetrics `json:"performance,omitempty"` +} + +type WordFrequency struct { + Word string `json:"word"` + Count int `json:"count"` +} + +type PerformanceMetrics struct { + FileDetectionTime time.Time `json:"file_detection_time"` + FirstSectionCreated time.Time `json:"first_section_created"` + LastSectionPublished time.Time `json:"last_section_published"` + AllResultsReceived time.Time `json:"all_results_received"` + OutputGenerated time.Time `json:"output_generated"` + TotalProcessingTimeMs int64 `json:"total_processing_time_ms"` + FileSplittingTimeMs int64 `json:"file_splitting_time_ms"` + WorkersProcessingTimeMs int64 `json:"workers_processing_time_ms"` + AggregationWaitTimeMs int64 `json:"aggregation_wait_time_ms"` + SectionJourneys []SectionJourney `json:"section_journeys"` + WorkerStats map[string]*WorkerStats `json:"worker_stats"` + TotalSections int `json:"total_sections"` + AverageSectionSize int `json:"average_section_size"` + SlowestSection int `json:"slowest_section_id"` + SlowestSectionTime int64 `json:"slowest_section_time_ms"` +} + +type SectionJourney struct { + SectionID int `json:"section_id"` + CreationTime time.Time `json:"creation_time"` + PublishTime time.Time `json:"publish_time"` + FirstWorkerStart time.Time `json:"first_worker_start"` + LastWorkerEnd time.Time `json:"last_worker_end"` + TotalJourneyTimeMs int64 `json:"total_journey_time_ms"` + WorkerTimings map[string]*WorkerTiming `json:"worker_timings"` +} + +type WorkerTiming struct { + WorkerType string `json:"worker_type"` + ReceiveTime time.Time `json:"receive_time"` + StartTime time.Time `json:"start_time"` + EndTime time.Time `json:"end_time"` + QueueWaitTimeMs int64 `json:"queue_wait_time_ms"` + ProcessingTimeMs int64 `json:"processing_time_ms"` +} + +type WorkerStats struct { + WorkerType string `json:"worker_type"` + TotalSections int `json:"total_sections"` + TotalProcessingMs int64 `json:"total_processing_ms"` + MinProcessingMs int64 `json:"min_processing_ms"` + MaxProcessingMs int64 `json:"max_processing_ms"` + AvgProcessingMs int64 `json:"avg_processing_ms"` + MedianProcessingMs int64 `json:"median_processing_ms"` + TotalQueueWaitMs int64 `json:"total_queue_wait_ms"` + AvgQueueWaitMs int64 `json:"avg_queue_wait_ms"` +} diff --git a/src/go.mod b/src/go.mod new file mode 100644 index 0000000..1bc5ef8 --- /dev/null +++ b/src/go.mod @@ -0,0 +1,15 @@ +module lab2 + +go 1.21 + +require ( + github.com/google/uuid v1.5.0 + github.com/rabbitmq/amqp091-go v1.9.0 + github.com/stretchr/testify v1.8.4 +) + +require ( + github.com/davecgh/go-spew v1.1.1 // indirect + github.com/pmezard/go-difflib v1.0.0 // indirect + gopkg.in/yaml.v3 v3.0.1 // indirect +) \ No newline at end of file diff --git a/src/go.sum b/src/go.sum new file mode 100644 index 0000000..78a2f68 --- /dev/null +++ b/src/go.sum @@ -0,0 +1,18 @@ +github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c= +github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/google/uuid v1.5.0 h1:1p67kYwdtXjb0gL0BPiP1Av9wiZPo5A8z2cWkTZ+eyU= +github.com/google/uuid v1.5.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0 h1:45sCR5RtlFHMR4UwH9sdQ5TC8v0qDQCHnXt+kaKSTVE= +github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= +github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= +github.com/rabbitmq/amqp091-go v1.9.0 h1:qrQtyzB4H8BQgEuJwhmVQqVHB9O4+MNDJCCAcpc3Aoo= +github.com/rabbitmq/amqp091-go v1.9.0/go.mod h1:+jPrT9iY2eLjRaMSRHUhc3z14E/l85kv/f+6luSD3pc= +github.com/stretchr/testify v1.8.4 h1:CcVxjf3Q8PM0mHUKJCdn+eZZtm5yQwehR5yeSVQQcUk= +github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= +go.uber.org/goleak v1.2.1 h1:NBol2c7O1ZokfZ0LEU9K6Whx/KnwvepVetCUhtKja4A= +gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15 h1:YR8cESwS4TdDjEe65xsg0ogRM/Nc3DYOhEAlW+xobZo= +gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= +gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/src/main/java/Main.java b/src/main/java/Main.java deleted file mode 100644 index 477e8b9..0000000 --- a/src/main/java/Main.java +++ /dev/null @@ -1,5 +0,0 @@ -public class Main { - public static void main(String[] args) { - System.out.println("Lab 3"); - } -} diff --git a/src/producer/main.go b/src/producer/main.go new file mode 100644 index 0000000..83b3523 --- /dev/null +++ b/src/producer/main.go @@ -0,0 +1,231 @@ +package main + +import ( + "fmt" + "io/ioutil" + "lab2/common" + "log" + "os" + "path/filepath" + "strings" + "time" + + "github.com/google/uuid" + amqp "github.com/rabbitmq/amqp091-go" +) + +func main() { + log.Println("Starting Producer with file monitoring...") + + config, _ := common.LoadConfig("/config/config.json") + + conn, _ := common.ConnectRabbitMQ(config.RabbitMQ.URL) + defer conn.Close() + + ch, _ := conn.Channel() + defer ch.Close() + + err := common.SetupExchange(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ExchangeType) + if err != nil { + log.Fatalf("Failed to setup exchange: %v", err) + } + + processedFiles := make(map[string]bool) + + log.Printf("Monitoring directory: %s", config.Input.DataDir) + log.Printf("Check interval: %d seconds", config.Producer.MonitorIntervalSeconds) + log.Println("Waiting for new .txt files to appear...") + + ticker := time.NewTicker(time.Duration(config.Producer.MonitorIntervalSeconds) * time.Second) + defer ticker.Stop() + + checkAndProcessNewFiles(ch, config, processedFiles) + + for range ticker.C { + checkAndProcessNewFiles(ch, config, processedFiles) + } +} + +func checkAndProcessNewFiles(ch *amqp.Channel, config *common.Config, processedFiles map[string]bool) { + files, err := ioutil.ReadDir(config.Input.DataDir) + if err != nil { + log.Printf("Error reading directory: %v", err) + return + } + + for _, file := range files { + if file.IsDir() || !strings.HasSuffix(file.Name(), ".txt") { + continue + } + + fileName := file.Name() + filePath := filepath.Join(config.Input.DataDir, fileName) + + if processedFiles[fileName] { + continue + } + + if !isFileReady(filePath, config.Producer.FileReadyCheckDelayMs) { + continue + } + + outputExists := checkOutputExists(config.Output.ResultsDir, fileName) + + if !outputExists { + log.Printf("Processing file (no output exists): %s", fileName) + fileDetectionTime := time.Now() + processFile(ch, config, filePath, fileName, fileDetectionTime) + processedFiles[fileName] = true + } else { + log.Printf("Skipping file (output already exists): %s", fileName) + processedFiles[fileName] = true + } + } +} + +func checkOutputExists(outputDir, fileName string) bool { + baseName := fileName[:len(fileName)-4] + jsonPath := filepath.Join(outputDir, baseName+".json") + + _, err := os.Stat(jsonPath) + return err == nil +} + +func isFileReady(filePath string, delayMs int) bool { + info1, err := os.Stat(filePath) + if err != nil { + return false + } + + time.Sleep(time.Duration(delayMs) * time.Millisecond) + + info2, err := os.Stat(filePath) + if err != nil { + return false + } + + return info1.Size() == info2.Size() && info1.ModTime() == info2.ModTime() +} + +func processFile(ch *amqp.Channel, config *common.Config, filePath, fileName string, fileDetectionTime time.Time) { + log.Printf("Processing file: %s", fileName) + + content, err := ioutil.ReadFile(filePath) + if err != nil { + log.Printf("Failed to read file %s: %v", fileName, err) + return + } + + text := string(content) + + nameReplacement, cleanText := parseNameReplacementHeader(text) + if nameReplacement != nil { + log.Printf("Found name replacement: %s -> %s", nameReplacement.SourceName, nameReplacement.TargetName) + text = cleanText + } + + sections := splitText(text, config.Processing.SectionSizeChars) + + taskID := uuid.New().String() + log.Printf("Task ID for %s: %s, sections: %d", fileName, taskID, len(sections)) + + for i, section := range sections { + sectionCreationTime := time.Now() + + task := common.TaskMessage{ + TaskID: taskID, + SectionID: i, + TotalCount: len(sections), + Text: section, + FileName: fileName, + NameReplacement: nameReplacement, + FileDetectionTime: fileDetectionTime, + SectionCreationTime: sectionCreationTime, + } + + routingKeys := []string{ + fmt.Sprintf("%s.wordcount", config.RabbitMQ.TasksRoutingKeyPrefix), + fmt.Sprintf("%s.topn", config.RabbitMQ.TasksRoutingKeyPrefix), + fmt.Sprintf("%s.sentencesort", config.RabbitMQ.TasksRoutingKeyPrefix), + fmt.Sprintf("%s.sentiment", config.RabbitMQ.TasksRoutingKeyPrefix), + } + + if nameReplacement != nil { + routingKeys = append(routingKeys, fmt.Sprintf("%s.namereplacement", config.RabbitMQ.TasksRoutingKeyPrefix)) + } + + for _, routingKey := range routingKeys { + task.SectionPublishTime = time.Now() + err := common.PublishMessage(ch, config.RabbitMQ.ExchangeName, routingKey, task) + if err != nil { + log.Printf("Failed to publish task to %s: %v", routingKey, err) + } + } + } + + log.Printf("Finished processing file: %s", fileName) +} + +func parseNameReplacementHeader(text string) (*common.NameReplacement, string) { + if !strings.HasPrefix(text, "#####\n") { + return nil, text + } + + endMarker := "\n#####" + endIndex := strings.Index(text[6:], endMarker) + if endIndex == -1 { + return nil, text + } + + endIndex += 6 + + headerContent := text[6:endIndex] + lines := strings.Split(strings.TrimSpace(headerContent), "\n") + + if len(lines) != 2 { + return nil, text + } + + sourceName := strings.TrimSpace(lines[0]) + targetName := strings.TrimSpace(lines[1]) + + if sourceName == "" || targetName == "" { + return nil, text + } + + cleanText := strings.TrimSpace(text[endIndex+len(endMarker):]) + + return &common.NameReplacement{ + SourceName: sourceName, + TargetName: targetName, + }, cleanText +} + +func splitText(text string, sectionSize int) []string { + var sections []string + + if len(text) <= sectionSize { + return []string{text} + } + + for i := 0; i < len(text); i += sectionSize { + end := i + sectionSize + if end > len(text) { + end = len(text) + } + + section := text[i:end] + + if end < len(text) { + lastSpace := strings.LastIndex(section, " ") + if lastSpace > sectionSize/2 { + section = text[i : i+lastSpace] + i = i + lastSpace - sectionSize + } + } + + sections = append(sections, section) + } + + return sections +} diff --git a/src/workers/namereplacement/main.go b/src/workers/namereplacement/main.go new file mode 100644 index 0000000..692eff2 --- /dev/null +++ b/src/workers/namereplacement/main.go @@ -0,0 +1,510 @@ +package main + +import ( + "encoding/json" + "lab2/common" + "log" + "regexp" + "strings" + "time" + "unicode" + + amqp "github.com/rabbitmq/amqp091-go" +) + +const WorkerType = "namereplacement" + +type NameParts struct { + LastName string + FirstName string + Patronymic string +} + +func main() { + log.Printf("Starting Name Replacement Worker...") + + config, _ := common.LoadConfig("/config/config.json") + + conn, err := common.ConnectRabbitMQ(config.RabbitMQ.URL) + if err != nil { + log.Fatalf("Failed to connect to RabbitMQ: %v", err) + } + defer conn.Close() + + ch, err := conn.Channel() + if err != nil { + log.Fatalf("Failed to open channel: %v", err) + } + defer ch.Close() + + err = common.SetupExchange(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ExchangeType) + if err != nil { + log.Fatalf("Failed to setup exchange: %v", err) + } + + queueName := "queue_namereplacement" + routingKey := "task.namereplacement" + + queue, err := common.DeclareQueue(ch, queueName) + if err != nil { + log.Fatalf("Failed to declare queue: %v", err) + } + + err = common.BindQueue(ch, queue.Name, routingKey, config.RabbitMQ.ExchangeName) + if err != nil { + log.Fatalf("Failed to bind queue: %v", err) + } + + msgs, err := ch.Consume( + queue.Name, + "", + false, + false, + false, + false, + nil, + ) + if err != nil { + log.Fatalf("Failed to register consumer: %v", err) + } + + log.Printf("Name Replacement Worker waiting for messages...") + + forever := make(chan bool) + + go func() { + for msg := range msgs { + processMessage(ch, config, msg) + } + }() + + <-forever +} + +func processMessage(ch *amqp.Channel, config *common.Config, delivery amqp.Delivery) { + receiveTime := time.Now() + + var task common.TaskMessage + err := json.Unmarshal(delivery.Body, &task) + if err != nil { + log.Printf("Failed to unmarshal task: %v", err) + delivery.Nack(false, false) + return + } + + log.Printf("Processing task %s, section %d", task.TaskID, task.SectionID) + + if task.NameReplacement == nil { + log.Printf("No name replacement info for task %s", task.TaskID) + delivery.Ack(false) + return + } + + startTime := time.Now() + modifiedText := replaceNames(task.Text, task.NameReplacement.SourceName, task.NameReplacement.TargetName) + endTime := time.Now() + + processingDuration := endTime.Sub(startTime).Milliseconds() + + result := common.ResultMessage{ + TaskID: task.TaskID, + SectionID: task.SectionID, + WorkerType: WorkerType, + WorkerReceiveTime: receiveTime, + ProcessingStartTime: startTime, + ProcessingEndTime: endTime, + ProcessingDurationMs: processingDuration, + Result: common.NameReplacementResult{ + ModifiedText: modifiedText, + }, + } + + err = common.PublishMessage(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ResultsRoutingKey, result) + if err != nil { + log.Printf("Failed to publish result: %v", err) + delivery.Nack(false, true) + return + } + + delivery.Ack(false) + log.Printf("Completed task %s, section %d, processing time: %dms", + task.TaskID, task.SectionID, processingDuration) +} + +func parseName(fullName string) NameParts { + parts := strings.Fields(fullName) + np := NameParts{} + + if len(parts) >= 1 { + np.LastName = parts[0] + } + if len(parts) >= 2 { + np.FirstName = parts[1] + } + if len(parts) >= 3 { + np.Patronymic = parts[2] + } + + return np +} + +func isCyrillic(s string) bool { + for _, r := range s { + if unicode.In(r, unicode.Cyrillic) { + return true + } + } + return false +} + +func replaceNames(text, sourceName, targetName string) string { + source := parseName(sourceName) + target := parseName(targetName) + + result := text + isRussian := isCyrillic(sourceName) + + patterns := generateNamePatterns(source, target, isRussian) + + for _, pattern := range patterns { + if pattern.sourcePattern != "" && pattern.targetPattern != "" { + result = replaceWithPattern(result, pattern.sourcePattern, pattern.targetPattern) + } else { + result = replacePattern(result, pattern.regex, pattern.replacement) + } + } + + return result +} + +type NamePattern struct { + regex *regexp.Regexp + replacement string + sourcePattern string + targetPattern string +} + +func generateNamePatterns(source, target NameParts, isRussian bool) []NamePattern { + var patterns []NamePattern + + if source.LastName != "" && source.FirstName != "" && source.Patronymic != "" { + if isRussian { + patterns = append(patterns, NamePattern{ + sourcePattern: source.LastName + " " + source.FirstName + " " + source.Patronymic, + targetPattern: target.LastName + " " + target.FirstName + " " + target.Patronymic, + }) + } else { + patterns = append(patterns, NamePattern{ + regex: createCaseInsensitiveRegex(source.LastName + `\s+` + source.FirstName + `\s+` + source.Patronymic), + replacement: target.LastName + " " + target.FirstName + " " + target.Patronymic, + }) + } + } + + if source.LastName != "" && source.FirstName != "" { + if isRussian { + patterns = append(patterns, NamePattern{ + sourcePattern: source.LastName + " " + source.FirstName, + targetPattern: target.LastName + " " + target.FirstName, + }) + } else { + patterns = append(patterns, NamePattern{ + regex: createCaseInsensitiveRegex(source.LastName + `\s+` + source.FirstName), + replacement: target.LastName + " " + target.FirstName, + }) + } + } + + if source.FirstName != "" && source.Patronymic != "" { + if isRussian { + patterns = append(patterns, NamePattern{ + sourcePattern: source.FirstName + " " + source.Patronymic, + targetPattern: target.FirstName + " " + target.Patronymic, + }) + } else { + patterns = append(patterns, NamePattern{ + regex: createCaseInsensitiveRegex(source.FirstName + `\s+` + source.Patronymic), + replacement: target.FirstName + " " + target.Patronymic, + }) + } + } + + if source.FirstName != "" { + firstInitial := string([]rune(source.FirstName)[0]) + targetFirstInitial := string([]rune(target.FirstName)[0]) + + patterns = append(patterns, NamePattern{ + regex: createCaseInsensitiveRegex(source.LastName + `\s+` + regexp.QuoteMeta(firstInitial) + `\.`), + replacement: target.LastName + " " + targetFirstInitial + ".", + }) + + if source.Patronymic != "" { + patronymicInitial := string([]rune(source.Patronymic)[0]) + targetPatronymicInitial := string([]rune(target.Patronymic)[0]) + + patterns = append(patterns, NamePattern{ + regex: createCaseInsensitiveRegex(source.LastName + `\s+` + regexp.QuoteMeta(firstInitial) + `\.\s*` + regexp.QuoteMeta(patronymicInitial) + `\.`), + replacement: target.LastName + " " + targetFirstInitial + ". " + targetPatronymicInitial + ".", + }) + + patterns = append(patterns, NamePattern{ + regex: createCaseInsensitiveRegex(regexp.QuoteMeta(firstInitial) + `\.\s*` + regexp.QuoteMeta(patronymicInitial) + `\.\s+` + source.LastName), + replacement: targetFirstInitial + ". " + targetPatronymicInitial + ". " + target.LastName, + }) + } + } + + if source.LastName != "" { + if isRussian { + patterns = append(patterns, NamePattern{ + sourcePattern: source.LastName, + targetPattern: target.LastName, + }) + } else { + patterns = append(patterns, NamePattern{ + regex: createCaseInsensitiveRegex(`\b` + regexp.QuoteMeta(source.LastName) + `\b`), + replacement: target.LastName, + }) + } + } + + if isRussian && source.FirstName != "" { + patterns = append(patterns, NamePattern{ + sourcePattern: source.FirstName, + targetPattern: target.FirstName, + }) + } + + if isRussian && source.Patronymic != "" { + patterns = append(patterns, NamePattern{ + sourcePattern: source.Patronymic, + targetPattern: target.Patronymic, + }) + } + + return patterns +} + +func replaceWithPattern(text, sourcePattern, targetPattern string) string { + sourceParts := strings.Fields(sourcePattern) + targetParts := strings.Fields(targetPattern) + + if len(sourceParts) != len(targetParts) { + return text + } + + result := text + + for i := range sourceParts { + result = replaceRussianWord(result, sourceParts[i], targetParts[i]) + } + + return result +} + +func replaceRussianWord(text, source, target string) string { + endings := getRussianCaseEndings(source) + targetEndings := getRussianCaseEndings(target) + + log.Printf("Replacing Russian word: '%s' -> '%s', generated %d forms", source, target, len(endings)) + + for i, ending := range endings { + if i < len(targetEndings) { + // Use Cyrillic-specific word boundaries (negative lookaround) + // Matches word not preceded/followed by Cyrillic letters + pattern := regexp.MustCompile(`(?i)(?:^|[^а-яА-ЯёЁ])` + regexp.QuoteMeta(ending.form) + `(?:$|[^а-яА-ЯёЁ])`) + + countBefore := len(pattern.FindAllString(text, -1)) + + text = pattern.ReplaceAllStringFunc(text, func(match string) string { + // Preserve leading/trailing non-Cyrillic characters + prefix := "" + suffix := "" + matchRunes := []rune(match) + + // Check if first character is not Cyrillic + if len(matchRunes) > 0 { + firstChar := matchRunes[0] + if !((firstChar >= 'а' && firstChar <= 'я') || (firstChar >= 'А' && firstChar <= 'Я') || firstChar == 'ё' || firstChar == 'Ё') { + prefix = string(firstChar) + matchRunes = matchRunes[1:] + } + } + + // Check if last character is not Cyrillic + if len(matchRunes) > 0 { + lastChar := matchRunes[len(matchRunes)-1] + if !((lastChar >= 'а' && lastChar <= 'я') || (lastChar >= 'А' && lastChar <= 'Я') || lastChar == 'ё' || lastChar == 'Ё') { + suffix = string(lastChar) + matchRunes = matchRunes[:len(matchRunes)-1] + } + } + + matchWord := string(matchRunes) + return prefix + preserveCase(matchWord, targetEndings[i].form) + suffix + }) + + if countBefore > 0 { + log.Printf(" Replaced %d occurrences of '%s' (%s) with '%s'", countBefore, ending.form, ending.case_, targetEndings[i].form) + } + } + } + + return text +} + +type CaseForm struct { + form string + case_ string +} + +func getRussianCaseEndings(word string) []CaseForm { + var forms []CaseForm + wordRunes := []rune(word) + + forms = append(forms, CaseForm{word, "nominative"}) + + if strings.HasSuffix(word, "ов") { + base := string(wordRunes[:len(wordRunes)-2]) + forms = append(forms, + CaseForm{base + "ова", "genitive"}, + CaseForm{base + "ову", "dative"}, + CaseForm{base + "ова", "accusative"}, + CaseForm{base + "овым", "instrumental"}, + CaseForm{base + "ове", "prepositional"}, + ) + } else if strings.HasSuffix(word, "ев") { + base := string(wordRunes[:len(wordRunes)-2]) + forms = append(forms, + CaseForm{base + "ева", "genitive"}, + CaseForm{base + "еву", "dative"}, + CaseForm{base + "ева", "accusative"}, + CaseForm{base + "евым", "instrumental"}, + CaseForm{base + "еве", "prepositional"}, + ) + } else if strings.HasSuffix(word, "ин") { + base := string(wordRunes[:len(wordRunes)-2]) + forms = append(forms, + CaseForm{base + "ина", "genitive"}, + CaseForm{base + "ину", "dative"}, + CaseForm{base + "ина", "accusative"}, + CaseForm{base + "иным", "instrumental"}, + CaseForm{base + "ине", "prepositional"}, + ) + } else if strings.HasSuffix(word, "ович") { + base := string(wordRunes[:len(wordRunes)-4]) + forms = append(forms, + CaseForm{base + "овича", "genitive"}, + CaseForm{base + "овичу", "dative"}, + CaseForm{base + "овича", "accusative"}, + CaseForm{base + "овичем", "instrumental"}, + CaseForm{base + "овиче", "prepositional"}, + ) + } else if strings.HasSuffix(word, "евич") { + base := string(wordRunes[:len(wordRunes)-4]) + forms = append(forms, + CaseForm{base + "евича", "genitive"}, + CaseForm{base + "евичу", "dative"}, + CaseForm{base + "евича", "accusative"}, + CaseForm{base + "евичем", "instrumental"}, + CaseForm{base + "евиче", "prepositional"}, + ) + } else if strings.HasSuffix(word, "ична") { + base := string(wordRunes[:len(wordRunes)-4]) + forms = append(forms, + CaseForm{base + "ичны", "genitive"}, + CaseForm{base + "ичне", "dative"}, + CaseForm{base + "ичну", "accusative"}, + CaseForm{base + "ичной", "instrumental"}, + CaseForm{base + "ичне", "prepositional"}, + ) + } else if strings.HasSuffix(word, "а") { + base := string(wordRunes[:len(wordRunes)-1]) + forms = append(forms, + CaseForm{base + "ы", "genitive"}, + CaseForm{base + "е", "dative"}, + CaseForm{base + "у", "accusative"}, + CaseForm{base + "ой", "instrumental"}, + CaseForm{base + "е", "prepositional"}, + ) + } else { + base := word + forms = append(forms, + CaseForm{base + "а", "genitive"}, + CaseForm{base + "у", "dative"}, + CaseForm{base + "а", "accusative"}, + CaseForm{base + "ом", "instrumental"}, + CaseForm{base + "е", "prepositional"}, + ) + } + + return forms +} + +func createCaseInsensitiveRegex(pattern string) *regexp.Regexp { + return regexp.MustCompile(`(?i)\b` + pattern + `\b`) +} + +func replacePattern(text string, regex *regexp.Regexp, replacement string) string { + return regex.ReplaceAllStringFunc(text, func(match string) string { + return preserveCase(match, replacement) + }) +} + +func preserveCase(original, replacement string) string { + if isAllUpper(original) { + return strings.ToUpper(replacement) + } else if isCapitalized(original) { + return capitalize(replacement) + } else if isAllLower(original) { + return strings.ToLower(replacement) + } + return replacement +} + +func isAllUpper(s string) bool { + hasLetter := false + for _, r := range s { + if unicode.IsLetter(r) { + hasLetter = true + if !unicode.IsUpper(r) { + return false + } + } + } + return hasLetter +} + +func isAllLower(s string) bool { + hasLetter := false + for _, r := range s { + if unicode.IsLetter(r) { + hasLetter = true + if !unicode.IsUpper(r) { + return false + } + } + } + return hasLetter +} + +func isCapitalized(s string) bool { + runes := []rune(s) + if len(runes) == 0 { + return false + } + return unicode.IsUpper(runes[0]) +} + +func capitalize(s string) string { + if s == "" { + return s + } + runes := []rune(s) + result := make([]rune, len(runes)) + result[0] = unicode.ToUpper(runes[0]) + for i := 1; i < len(runes); i++ { + result[i] = unicode.ToLower(runes[i]) + } + return string(result) +} diff --git a/src/workers/sentencesort/main.go b/src/workers/sentencesort/main.go new file mode 100644 index 0000000..fe5eed5 --- /dev/null +++ b/src/workers/sentencesort/main.go @@ -0,0 +1,137 @@ +package main + +import ( + "encoding/json" + "lab2/common" + "log" + "regexp" + "strings" + "time" + + amqp "github.com/rabbitmq/amqp091-go" +) + +const WorkerType = "sentencesort" + +var sentenceRegex = regexp.MustCompile(`[^.!?]+[.!?]+`) + +func main() { + log.Printf("Starting Sentence Sort Worker...") + + config, _ := common.LoadConfig("/config/config.json") + conn, err := common.ConnectRabbitMQ(config.RabbitMQ.URL) + if err != nil { + log.Fatalf("Failed to connect to RabbitMQ: %v", err) + } + defer conn.Close() + + ch, err := conn.Channel() + if err != nil { + log.Fatalf("Failed to open channel: %v", err) + } + defer ch.Close() + + err = common.SetupExchange(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ExchangeType) + if err != nil { + log.Fatalf("Failed to setup exchange: %v", err) + } + + queueName := "queue_sentencesort" + routingKey := "task.sentencesort" + + queue, err := common.DeclareQueue(ch, queueName) + if err != nil { + log.Fatalf("Failed to declare queue: %v", err) + } + + err = common.BindQueue(ch, queue.Name, routingKey, config.RabbitMQ.ExchangeName) + if err != nil { + log.Fatalf("Failed to bind queue: %v", err) + } + + msgs, err := ch.Consume( + queue.Name, + "", + false, // auto-ack + false, + false, + false, + nil, + ) + if err != nil { + log.Fatalf("Failed to register consumer: %v", err) + } + + log.Printf("Sentence Sort Worker waiting for messages...") + + forever := make(chan bool) + + go func() { + for msg := range msgs { + processMessage(ch, config, msg) + } + }() + + <-forever +} + +func processMessage(ch *amqp.Channel, config *common.Config, delivery amqp.Delivery) { + receiveTime := time.Now() + + var task common.TaskMessage + err := json.Unmarshal(delivery.Body, &task) + if err != nil { + log.Printf("Failed to unmarshal task: %v", err) + delivery.Nack(false, false) + return + } + + log.Printf("Processing task %s, section %d", task.TaskID, task.SectionID) + + startTime := time.Now() + sentences := extractAndSortSentences(task.Text) + endTime := time.Now() + + processingDuration := endTime.Sub(startTime).Milliseconds() + + result := common.ResultMessage{ + TaskID: task.TaskID, + SectionID: task.SectionID, + WorkerType: WorkerType, + WorkerReceiveTime: receiveTime, + ProcessingStartTime: startTime, + ProcessingEndTime: endTime, + ProcessingDurationMs: processingDuration, + Result: common.SentenceSortResult{ + Sentences: sentences, + }, + } + + err = common.PublishMessage(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ResultsRoutingKey, result) + if err != nil { + log.Printf("Failed to publish result: %v", err) + delivery.Nack(false, true) + return + } + + delivery.Ack(false) + log.Printf("Completed task %s, section %d, sentences: %d, processing time: %dms", + task.TaskID, task.SectionID, len(sentences), processingDuration) +} + +func extractAndSortSentences(text string) []common.SentenceInfo { + matches := sentenceRegex.FindAllString(text, -1) + + var sentences []common.SentenceInfo + for _, match := range matches { + sentence := strings.TrimSpace(match) + if len(sentence) > 0 { + sentences = append(sentences, common.SentenceInfo{ + Text: sentence, + Length: len(sentence), + }) + } + } + + return sentences +} diff --git a/src/workers/sentiment/requirements.txt b/src/workers/sentiment/requirements.txt new file mode 100644 index 0000000..d9721f0 --- /dev/null +++ b/src/workers/sentiment/requirements.txt @@ -0,0 +1,6 @@ +transformers==4.36.0 +torch==2.1.0 +pika==1.3.2 +sentencepiece==0.1.99 +numpy<2 +protobuf diff --git a/src/workers/sentiment/worker.py b/src/workers/sentiment/worker.py new file mode 100644 index 0000000..0477125 --- /dev/null +++ b/src/workers/sentiment/worker.py @@ -0,0 +1,147 @@ +#!/usr/bin/env python3 + +import json +import logging +import os +import time +from datetime import datetime +from transformers import pipeline +import pika + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +WORKER_TYPE = "sentiment" + +def load_config(): + with open('/config/config.json', 'r') as f: + return json.load(f) + +def connect_rabbitmq(url, max_retries=30): + for i in range(max_retries): + try: + params = pika.URLParameters(url) + connection = pika.BlockingConnection(params) + logger.info("Connected to RabbitMQ") + return connection + except Exception as e: + logger.warning(f"Failed to connect to RabbitMQ, retrying... ({i+1}/{max_retries})") + time.sleep(2) + + raise Exception(f"Failed to connect to RabbitMQ after {max_retries} attempts") + +def analyze_sentiment(sentiment_pipeline, text): + try: + result = sentiment_pipeline(text[:512]) + + label = result[0]['label'] + score = result[0]['score'] + + sentiment_map = { + 'positive': 'positive', + 'negative': 'negative', + 'neutral': 'neutral', + 'Positive': 'positive', + 'Negative': 'negative', + 'Neutral': 'neutral' + } + + sentiment = sentiment_map.get(label, label.lower()) + + return { + 'sentiment': sentiment, + 'score': float(score) + } + except Exception as e: + logger.error(f"Error analyzing sentiment: {e}") + return { + 'sentiment': 'neutral', + 'score': 0.0 + } + +def process_message(channel, method, properties, body, sentiment_pipeline, config): + receive_time = datetime.utcnow() + + try: + task = json.loads(body) + logger.info(f"Processing task {task['task_id']}, section {task['section_id']}") + + start_time = datetime.utcnow() + sentiment_result = analyze_sentiment(sentiment_pipeline, task['text']) + end_time = datetime.utcnow() + + processing_duration_ms = int((end_time - start_time).total_seconds() * 1000) + + result = { + 'task_id': task['task_id'], + 'section_id': task['section_id'], + 'worker_type': WORKER_TYPE, + 'worker_receive_time': receive_time.isoformat() + 'Z', + 'processing_start_time': start_time.isoformat() + 'Z', + 'processing_end_time': end_time.isoformat() + 'Z', + 'processing_duration_ms': processing_duration_ms, + 'result': sentiment_result + } + + channel.basic_publish( + exchange=config['rabbitmq']['exchange_name'], + routing_key=config['rabbitmq']['results_routing_key'], + body=json.dumps(result), + properties=pika.BasicProperties( + delivery_mode=2, + content_type='application/json' + ) + ) + + channel.basic_ack(delivery_tag=method.delivery_tag) + logger.info(f"Completed task {task['task_id']}, section {task['section_id']}, sentiment: {sentiment_result['sentiment']}, processing time: {processing_duration_ms}ms") + + except Exception as e: + logger.error(f"Error processing message: {e}") + channel.basic_nack(delivery_tag=method.delivery_tag, requeue=True) + +def main(): + logger.info("Starting Sentiment Analysis Worker...") + + config = load_config() + + logger.info("Loading sentiment analysis model...") + sentiment_pipeline = pipeline( + "sentiment-analysis", + model="cardiffnlp/twitter-xlm-roberta-base-sentiment", + device=-1 + ) + logger.info("Model loaded successfully") + + connection = connect_rabbitmq(config['rabbitmq']['url']) + channel = connection.channel() + + exchange_name = config['rabbitmq']['exchange_name'] + channel.exchange_declare( + exchange=exchange_name, + exchange_type=config['rabbitmq']['exchange_type'], + durable=True + ) + + queue_name = "queue_sentiment" + routing_key = "task.sentiment" + + channel.queue_declare(queue=queue_name, durable=True) + channel.queue_bind( + exchange=exchange_name, + queue=queue_name, + routing_key=routing_key + ) + + channel.basic_qos(prefetch_count=1) + + def callback(ch, method, properties, body): + process_message(ch, method, properties, body, sentiment_pipeline, config) + + channel.basic_consume(queue=queue_name, on_message_callback=callback) + + logger.info("Sentiment Analysis Worker waiting for messages...") + channel.start_consuming() + +if __name__ == '__main__': + main() diff --git a/src/workers/topn/main.go b/src/workers/topn/main.go new file mode 100644 index 0000000..fba3700 --- /dev/null +++ b/src/workers/topn/main.go @@ -0,0 +1,139 @@ +package main + +import ( + "encoding/json" + "lab2/common" + "log" + "strings" + "time" + "unicode" + + amqp "github.com/rabbitmq/amqp091-go" +) + +const WorkerType = "topn" + +func main() { + log.Printf("Starting Top-N Words Worker...") + + config, err := common.LoadConfig("/config/config.json") + if err != nil { + log.Fatalf("Failed to load config: %v", err) + } + + conn, err := common.ConnectRabbitMQ(config.RabbitMQ.URL) + if err != nil { + log.Fatalf("Failed to connect to RabbitMQ: %v", err) + } + defer conn.Close() + + ch, err := conn.Channel() + if err != nil { + log.Fatalf("Failed to open channel: %v", err) + } + defer ch.Close() + + err = common.SetupExchange(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ExchangeType) + if err != nil { + log.Fatalf("Failed to setup exchange: %v", err) + } + + queueName := "queue_topn" + routingKey := "task.topn" + + queue, err := common.DeclareQueue(ch, queueName) + if err != nil { + log.Fatalf("Failed to declare queue: %v", err) + } + + err = common.BindQueue(ch, queue.Name, routingKey, config.RabbitMQ.ExchangeName) + if err != nil { + log.Fatalf("Failed to bind queue: %v", err) + } + + msgs, err := ch.Consume( + queue.Name, + "", + false, // auto-ack + false, + false, + false, + nil, + ) + if err != nil { + log.Fatalf("Failed to register consumer: %v", err) + } + + log.Printf("Top-N Words Worker waiting for messages...") + + forever := make(chan bool) + + go func() { + for msg := range msgs { + processMessage(ch, config, msg) + } + }() + + <-forever +} + +func processMessage(ch *amqp.Channel, config *common.Config, delivery amqp.Delivery) { + receiveTime := time.Now() + + var task common.TaskMessage + err := json.Unmarshal(delivery.Body, &task) + if err != nil { + log.Printf("Failed to unmarshal task: %v", err) + delivery.Nack(false, false) + return + } + + log.Printf("Processing task %s, section %d", task.TaskID, task.SectionID) + + startTime := time.Now() + wordFreq := countWordFrequencies(task.Text) + endTime := time.Now() + + processingDuration := endTime.Sub(startTime).Milliseconds() + + result := common.ResultMessage{ + TaskID: task.TaskID, + SectionID: task.SectionID, + WorkerType: WorkerType, + WorkerReceiveTime: receiveTime, + ProcessingStartTime: startTime, + ProcessingEndTime: endTime, + ProcessingDurationMs: processingDuration, + Result: common.TopNWordsResult{ + Words: wordFreq, + }, + } + + err = common.PublishMessage(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ResultsRoutingKey, result) + if err != nil { + log.Printf("Failed to publish result: %v", err) + delivery.Nack(false, true) + return + } + + delivery.Ack(false) + log.Printf("Completed task %s, section %d, unique words: %d, processing time: %dms", + task.TaskID, task.SectionID, len(wordFreq), processingDuration) +} + +func countWordFrequencies(text string) map[string]int { + freq := make(map[string]int) + + words := strings.FieldsFunc(text, func(r rune) bool { + return !unicode.IsLetter(r) && !unicode.IsNumber(r) + }) + + for _, word := range words { + word = strings.ToLower(word) + if len(word) > 0 { + freq[word]++ + } + } + + return freq +} diff --git a/src/workers/wordcount/main.go b/src/workers/wordcount/main.go new file mode 100644 index 0000000..bc6c0b7 --- /dev/null +++ b/src/workers/wordcount/main.go @@ -0,0 +1,129 @@ +package main + +import ( + "encoding/json" + "lab2/common" + "log" + "strings" + "time" + "unicode" + + amqp "github.com/rabbitmq/amqp091-go" +) + +const WorkerType = "wordcount" + +func main() { + log.Printf("Starting Word Count Worker...") + + config, err := common.LoadConfig("/config/config.json") + if err != nil { + log.Fatalf("Failed to load config: %v", err) + } + + conn, err := common.ConnectRabbitMQ(config.RabbitMQ.URL) + if err != nil { + log.Fatalf("Failed to connect to RabbitMQ: %v", err) + } + defer conn.Close() + + ch, err := conn.Channel() + if err != nil { + log.Fatalf("Failed to open channel: %v", err) + } + defer ch.Close() + + err = common.SetupExchange(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ExchangeType) + if err != nil { + log.Fatalf("Failed to setup exchange: %v", err) + } + + queueName := "queue_wordcount" + routingKey := "task.wordcount" + + queue, err := common.DeclareQueue(ch, queueName) + if err != nil { + log.Fatalf("Failed to declare queue: %v", err) + } + + err = common.BindQueue(ch, queue.Name, routingKey, config.RabbitMQ.ExchangeName) + if err != nil { + log.Fatalf("Failed to bind queue: %v", err) + } + + msgs, err := ch.Consume( + queue.Name, + "", + false, // auto-ack + false, + false, + false, + nil, + ) + if err != nil { + log.Fatalf("Failed to register consumer: %v", err) + } + + log.Printf("Word Count Worker waiting for messages...") + + forever := make(chan bool) + + go func() { + for msg := range msgs { + processMessage(ch, config, msg) + } + }() + + <-forever +} + +func processMessage(ch *amqp.Channel, config *common.Config, delivery amqp.Delivery) { + receiveTime := time.Now() + + var task common.TaskMessage + err := json.Unmarshal(delivery.Body, &task) + if err != nil { + log.Printf("Failed to unmarshal task: %v", err) + delivery.Nack(false, false) + return + } + + log.Printf("Processing task %s, section %d", task.TaskID, task.SectionID) + + startTime := time.Now() + wordCount := countWords(task.Text) + endTime := time.Now() + + processingDuration := endTime.Sub(startTime).Milliseconds() + + result := common.ResultMessage{ + TaskID: task.TaskID, + SectionID: task.SectionID, + WorkerType: WorkerType, + WorkerReceiveTime: receiveTime, + ProcessingStartTime: startTime, + ProcessingEndTime: endTime, + ProcessingDurationMs: processingDuration, + Result: common.WordCountResult{ + TotalWords: wordCount, + }, + } + + err = common.PublishMessage(ch, config.RabbitMQ.ExchangeName, config.RabbitMQ.ResultsRoutingKey, result) + if err != nil { + log.Printf("Failed to publish result: %v", err) + delivery.Nack(false, true) + return + } + + delivery.Ack(false) + log.Printf("Completed task %s, section %d, word count: %d, processing time: %dms", + task.TaskID, task.SectionID, wordCount, processingDuration) +} + +func countWords(text string) int { + words := strings.FieldsFunc(text, func(r rune) bool { + return !unicode.IsLetter(r) && !unicode.IsNumber(r) + }) + return len(words) +}