Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
4e08b80
TIKA-4512 -- first steps
tballison Oct 10, 2025
5bad328
TIKA-4512 -- remove debugging class
tballison Oct 10, 2025
9287d53
Merge branch 'main' into TIKA-4512
tballison Oct 15, 2025
161f672
TIKA-4512 -- WIP do not merge
tballison Oct 17, 2025
08d9533
TIKA-4512 -- this works: mvn clean install -am -pl :tika-pipes-core-t…
tballison Oct 22, 2025
bf53b8e
TIKA-4512 -- checkpoint
tballison Oct 23, 2025
3ef978f
TIKA-4512 -- this moves testing to "on the classpath" except for tika…
tballison Oct 23, 2025
a3818e1
TIKA-4512 -- code improvements
tballison Oct 23, 2025
0929941
TIKA-4512 -- removes forking code in server
tballison Oct 23, 2025
4c93ae7
TIKA-4512 -- updates unit tests in tika-server-core for new behaviors
tballison Oct 27, 2025
2085c7e
TIKA-4512 -- updates unit tests in tika-server-standard for new behav…
tballison Oct 27, 2025
5ccb770
TIKA-4512 -- checkstyle
tballison Oct 27, 2025
c4f9b52
TIKA-4512 -- rename fetcherName to fetcherPluginId
tballison Oct 27, 2025
b06877b
TIKA-4512 -- clean up dependencies in file-system-fetcher
tballison Oct 27, 2025
d173308
Merge branch 'main' into TIKA-4519
tballison Oct 27, 2025
6eb8916
Merge branch 'main' into TIKA-4519
tballison Oct 27, 2025
18f0d77
TIKA-4519 - checkpoint
tballison Oct 29, 2025
883d936
Merge branch 'main' into TIKA-4519
tballison Nov 3, 2025
77682b9
TIKA-4519 -- checkpoint commit
tballison Nov 3, 2025
af7b252
TIKA-4519 -- tika-app tests work
tballison Nov 3, 2025
121eb7f
TIKA-4519 -- checkstyle
tballison Nov 3, 2025
57c4cf7
TIKA-4519 -- checkpoint moving into factories as plugins
tballison Nov 3, 2025
8ce960b
TIKA-4519 -- checkpoint
tballison Nov 4, 2025
d9875ca
TIKA-4519 -- checkpoint tika-pipes-tests work
tballison Nov 4, 2025
cfa57d8
TIKA-4519 -- tests through tika-app work
tballison Nov 4, 2025
05bb225
TIKA-4519 -- checkstyle through tika-app
tballison Nov 5, 2025
d73027b
TIKA-4519 -- tika-server now works
tballison Nov 5, 2025
088e08e
TIKA-4519 -- add file-system-pipes-iterator
tballison Nov 5, 2025
afd593c
TIKA-4519 -- checkpoint commit on pipesreporter
tballison Nov 6, 2025
40ba5e6
Merge branch 'main' into TIKA-4519
tballison Nov 6, 2025
e385b93
TIKA-4519 -- pipesreporter added
tballison Nov 6, 2025
b550351
TIKA-4519 -- checkstyle
tballison Nov 6, 2025
1b15799
jdbc pipes reporter and other fixes
tballison Nov 6, 2025
6ae3319
TIKA-4519 -- updates to add back includes and excludes
tballison Nov 6, 2025
a3b4cad
TIKA-4519 -- add opensearch reporter
tballison Nov 6, 2025
3255c30
TIKA-4519 -- opensearch integration tests now work
tballison Nov 7, 2025
7fc4ca4
TIKA-4519 -- checkstyle and other fixes
tballison Nov 10, 2025
0aaa7d2
TIKA-4519 -- further fixes
tballison Nov 10, 2025
d5d98b8
Merge branch 'main' into TIKA-4519
tballison Nov 10, 2025
fc4532b
Merge branch 'main' into TIKA-4519
tballison Nov 10, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
4 changes: 3 additions & 1 deletion pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,15 @@
<module>tika-bom</module>
<module>tika-core</module>
<module>tika-serialization</module>
<module>tika-plugins-core</module>
<module>tika-detectors</module>
<module>tika-parsers</module>
<module>tika-bundles</module>
<module>tika-xmp</module>
<module>tika-langdetect</module>
<module>tika-pipes</module>
<module>tika-grpc</module>

<!-- <module>tika-grpc</module> -->
<module>tika-app</module>
<module>tika-server</module>
<module>tika-integration-tests</module>
Expand Down
140 changes: 76 additions & 64 deletions tika-app/pom.xml
Original file line number Diff line number Diff line change
Expand Up @@ -89,75 +89,16 @@
</resource>
</resources>
<plugins>
<plugin>
<artifactId>maven-shade-plugin</artifactId>
<version>${maven.shade.version}</version>
<executions>
<execution>
<phase>package</phase>
<goals>
<goal>shade</goal>
</goals>
<configuration>
<createDependencyReducedPom>
false
</createDependencyReducedPom>
<artifactSet>
<excludes>
<exclude>org.apache.tika:tika-parsers-standard-package:jar:</exclude>
</excludes>
</artifactSet>
<filters>
<filter>
<artifact>*:*</artifact>
<excludes>
<exclude>META-INF/maven/plugin.xml</exclude>
<exclude>module-info.class</exclude>
<exclude>META-INF/*</exclude>
<exclude>LICENSE.txt</exclude>
<exclude>NOTICE.txt</exclude>
<exclude>CHANGES</exclude>
<exclude>README</exclude>
<exclude>builddef.lst</exclude>
<!-- https://issues.apache.org/jira/browse/TIKA-3650 -->
<exclude>javax/**/*</exclude>

</excludes>
</filter>
</filters>
<transformers>
<transformer implementation="org.apache.maven.plugins.shade.resource.ManifestResourceTransformer">
<mainClass>org.apache.tika.cli.TikaCLI</mainClass>
<manifestEntries>
<Multi-Release>true</Multi-Release>
</manifestEntries>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.ServicesResourceTransformer" />
<transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>META-INF/LICENSE</resource>
<file>target/classes/META-INF/LICENSE</file>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>META-INF/NOTICE</resource>
<file>target/classes/META-INF/NOTICE</file>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.IncludeResourceTransformer">
<resource>META-INF/DEPENDENCIES</resource>
<file>target/classes/META-INF/DEPENDENCIES</file>
</transformer>
<transformer implementation="org.apache.maven.plugins.shade.resource.AppendingTransformer">
<resource>META-INF/cxf/bus-extensions.txt</resource>
</transformer>
</transformers>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-jar-plugin</artifactId>
<configuration>
<archive>
<manifest>
<mainClass>org.apache.tika.cli.TikaCLI</mainClass>
<addClasspath>true</addClasspath>
<classpathPrefix>lib/</classpathPrefix>
</manifest>
<manifestEntries>
<Automatic-Module-Name>org.apache.tika.app</Automatic-Module-Name>
</manifestEntries>
Expand All @@ -174,6 +115,77 @@
</excludes>
</configuration>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<descriptors>
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
<appendAssemblyId>false</appendAssemblyId>
</configuration>
<executions>
<execution>
<id>make-assembly</id>
<phase>package</phase>
<goals>
<goal>single</goal>
</goals>
</execution>
</executions>
</plugin>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<executions>
<execution>
<id>copy-plugins</id>
<phase>generate-test-resources</phase>
<goals>
<goal>unpack</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/plugins</outputDirectory>
<artifactItems>
<artifactItem>
<groupId>org.apache.tika</groupId>
<artifactId>tika-fetcher-file-system</artifactId>
<version>${project.version}</version>
<type>zip</type>
<overWrite>true</overWrite>
</artifactItem>
<artifactItem>
<groupId>org.apache.tika</groupId>
<artifactId>tika-emitter-file-system</artifactId>
<version>${project.version}</version>
<type>zip</type>
<overWrite>true</overWrite>
</artifactItem>
<artifactItem>
<groupId>org.apache.tika</groupId>
<artifactId>tika-pipes-iterator-file-system</artifactId>
<version>${project.version}</version>
<type>zip</type>
<overWrite>true</overWrite>
</artifactItem>
</artifactItems>
</configuration>
</execution>
<execution>
<id>copy-dependencies</id>
<phase>package</phase>
<goals>
<goal>copy-dependencies</goal>
</goals>
<configuration>
<outputDirectory>${project.build.directory}/lib</outputDirectory>
<includeScope>runtime</includeScope>
<stripVersion>false</stripVersion>
<overWriteReleases>false</overWriteReleases>
<overWriteSnapshots>false</overWriteSnapshots>
</configuration>
</execution>
</executions>
</plugin>
</plugins>
</build>

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,27 +14,38 @@
See the License for the specific language governing permissions and
limitations under the License.
-->
<assembly>
<id>standalone</id>
<assembly xmlns="http://maven.apache.org/ASSEMBLY/2.1.1"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/ASSEMBLY/2.1.1 http://maven.apache.org/xsd/assembly-2.1.1.xsd">
<id>bin</id>
<formats>
<format>jar</format>
<format>zip</format>
</formats>
<includeBaseDirectory>false</includeBaseDirectory>

<dependencySets>
<dependencySet>
<outputDirectory></outputDirectory>
<unpack>true</unpack>
<unpackOptions>
<excludes>
<exclude>META-INF/MANIFEST.MF</exclude>
<exclude>META-INF/README*</exclude>
<exclude>META-INF/NOTICE*</exclude>
<exclude>META-INF/LICENSE*</exclude>
<exclude>README*</exclude>
<exclude>NOTICE*</exclude>
<exclude>LICENSE*</exclude>
</excludes>
</unpackOptions>
<outputDirectory>lib</outputDirectory>
<useProjectArtifact>false</useProjectArtifact>
<unpack>false</unpack>
<scope>runtime</scope>
</dependencySet>
</dependencySets>
</assembly>
<fileSets>
<fileSet>
<directory>${project.build.directory}</directory>
<outputDirectory>/</outputDirectory>
<includes>
<include>*.jar</include>
</includes>
<excludes>
<exclude>*-sources.jar</exclude>
<exclude>*-javadoc.jar</exclude>
</excludes>
</fileSet>
<fileSet>
<directory>${project.build.directory}/plugins</directory>
<outputDirectory>plugins</outputDirectory>
</fileSet>
</fileSets>
</assembly>
2 changes: 0 additions & 2 deletions tika-app/src/main/java/org/apache/tika/cli/AsyncHelper.java
Original file line number Diff line number Diff line change
Expand Up @@ -32,8 +32,6 @@ public static String[] translateArgs(String[] args) {
String c = arg.substring(TIKA_CONFIG_KEY.length());
argList.add("-c");
argList.add(c);
} else if (arg.equals("-a")) {
//do nothing
} else {
argList.add(args[i]);
}
Expand Down
5 changes: 5 additions & 0 deletions tika-app/src/main/java/org/apache/tika/cli/TikaCLI.java
Original file line number Diff line number Diff line change
Expand Up @@ -270,6 +270,11 @@ private static void async(String[] args) throws Exception {
TikaAsyncCLI.main(args);
return;
}
if (args.length == 2 && args[0].endsWith(".xml") && args[1].endsWith(".json")) {
TikaAsyncCLI.main(args);
return;
};
//TODO -- are there other shortcuts?
Path tmpConfig = null;
try {
tmpConfig = Files.createTempFile("tika-config-", ".xml");
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@ public class AsyncHelperTest {

@Test
public void testBasic() throws Exception {
String[] args = new String[]{"-a", "--config=blah.xml", "-i", "input.docx", "-o", "output/dir"};
String[] expected = new String[]{"-c", "blah.xml", "-i", "input.docx", "-o", "output/dir"};
String[] args = new String[]{"-a", "blah.json", "--config=blah.xml", "-i", "input.docx", "-o", "output/dir"};
String[] expected = new String[]{"-a", "blah.json", "-c", "blah.xml", "-i", "input.docx", "-o", "output/dir"};
assertArrayEquals(expected, AsyncHelper.translateArgs(args));
}
}
Loading