diff --git a/data/en/plugins_list.json b/data/en/plugins_list.json index c79a2138..fe883765 100644 --- a/data/en/plugins_list.json +++ b/data/en/plugins_list.json @@ -1,9 +1,1151 @@ { + "Database": { + "Name": "Database", + "Display Name": "Database", + "Type": "Source", + "Description": "Writes records to a database table. Each record will be written to a row in the table.", + "Icon": "N/A" + }, + "DatabaseQuery": { + "Name": "DatabaseQuery", + "Display Name": "DatabaseQuery", + "Type": "Action", + "Icon": "N/A", + "Description": "Runs a database query at the end of the pipeline run.\nCan be configured to run only on success, only on failure, or always at the end of the run." + }, + "S3": { + "Name": "S3", + "Display Name": "S3", + "Type": "Source", + "Icon": "", + "Description": "Batch source to use Amazon S3 as a Source." + }, + "File": { + "Name": "File", + "Display Name": "File", + "Type": "Sink", + "Icon": "N/A", + "Description": "Writes to a filesystem in various formats format." + }, + "Twitter": { + "Name": "Twitter", + "Display Name": "Twitter", + "Type": "Source", + "Icon": "N/A", + "Description": "Samples tweets in real-time through Spark streaming. Output records will have this schema:" + }, + "HTTPPoller": { + "Name": "HTTPPoller", + "Display Name": "HTTPPoller", + "Type": "Source", + "Icon": "N/A", + "Description": null + }, + "HDFS": { + "Name": "HDFS", + "Display Name": "HDFS", + "Type": "Sink", + "Icon": "N/A", + "Description": null + }, + "MLPredictor": { + "Name": "MLPredictor", + "Display Name": "MLPredictor", + "Type": "Analytics", + "Description": "Uses a model trained by the ModelTrainer plugin to add a prediction field to incoming records.\nThe same features used to train the model must be present in each input record, but input records can also\ncontain additional non-feature fields. If the trained model uses categorical features,\nand if the record being predicted contains new categories, that record will be dropped.\nFor example, suppose categorical feature 'city' was used to train a model that predicts housing prices.\nIf an incoming record has 'New York' as the city, but 'New York' was not in the training set,\nthat record will be dropped.", + "Icon": "" + }, + "HTTPCallback": { + "Name": "HTTPCallback", + "Display Name": "HTTPCallback", + "Type": "Action", + "Description": "Performs an HTTP request at the end of a pipeline run.", + "Icon": "N/A" + }, + "HBase": { + "Name": "HBase", + "Display Name": "HBase", + "Type": "Source", + "Description": "Batch source that reads from a column family in an HBase table.\nThis source differs from the Table source in that it does not use a CDAP dataset,\nbut reads directly from HBase.", + "Icon": "N/A" + }, + "Kafka": { + "Name": "Kafka", + "Display Name": "Kafka", + "Type": "alert", + "Description": null, + "Icon": "" + }, + "KafkaAlerts": { + "Name": "KafkaAlerts", + "Display Name": "Kafka Alert Publisher", + "Type": "Alert Publisher", + "Description": "Kafka Alert Publisher that allows you to publish alerts to kafka as json objects.\nThe plugin internally uses kafka producer apis to publish alerts. \nThe plugin allows to specify kafka topic to use for publishing and other additional\nkafka producer properties. This plugin uses kafka 0.8.2 java apis.", + "Icon": "" + }, + "Conditional": { + "Name": "Conditional", + "Display Name": "Conditional", + "Type": "Condition", + "Description": "A control flow plugin that allows conditional execution within\npipelines. The conditions are specified as expressions and the\nvariables could include values specified as runtime arguments of\nthe pipeline, token from plugins prior to the condition and global\nthat includes global information about pipeline like stage, pipeline,\nlogical start time and plugin.", + "Icon": "" + }, + "Wrangler": { + "Name": "Wrangler", + "Display Name": "Wrangler", + "Type": "Transform", + "Icon": "N/A", + "Description": "This plugin applies data transformation directives on your data records. The directives\nare generated either through an interactive user interface or by manual entry into the\nplugin." + }, + "NullFieldSplitter": { + "Name": "NullFieldSplitter", + "Display Name": "NullFieldSplitter", + "Type": "Transform", + "Icon": "", + "Description": null + }, + "XMLParser": { + "Name": "XMLParser", + "Display Name": "XMLParser", + "Type": "Transform", + "Icon": "N/A", + "Description": "The XML Parser Transform uses XPath to extract fields from a complex XML event. This plugin should generally be used\nin conjunction with the XML Reader Batch Source. The XML Reader will provide individual events to the XML Parser,\nwhich will be responsible for extracting fields from the events and mapping them to the output schema." + }, + "XMLMultiParser": { + "Name": "XMLMultiParser", + "Display Name": "XMLMultiParser", + "Type": "Transform", + "Icon": "N/A", + "Description": "The XML Multi Parser Transform uses XPath to extract fields from an XML document. It will generate records from\nthe children of the element specified by the XPath. If there is some error parsing the document or building the record,\nthe problematic input record will be dropped." + }, + "Hasher": { + "Name": "Hasher", + "Display Name": "Hasher", + "Type": "Transform", + "Icon": "N/A", + "Description": null + }, + "UnionSplitter": { + "Name": "UnionSplitter", + "Display Name": "UnionSplitter", + "Type": "Transform", + "Description": "The union splitter is used to split data by a union schema, so that type specific logic can be done downstream.", + "Icon": "" + }, + "JSONFormatter": { + "Name": "JSONFormatter", + "Display Name": "JSONFormatter", + "Type": "Transform", + "Icon": "N/A", + "Description": null + }, + "CloneRecord": { + "Name": "CloneRecord", + "Display Name": "Record Duplicator", + "Type": "Transform", + "Description": "Makes a copy of every input record received for a configured number of times on the output. \nThis transform does not change any record fields or types. It's an identity transform.", + "Icon": "N/A" + }, + "ValueMapper": { + "Name": "ValueMapper", + "Display Name": "ValueMapper", + "Type": "Transform", + "Icon": "N/A", + "Description": "Value Mapper is a transform plugin that maps string values of a field in the input record\nto a mapping value using a mapping dataset." + }, + "XMLToJSON": { + "Name": "XMLToJSON", + "Display Name": "XML to Json String", + "Type": "Transform", + "Description": "Accepts a field that contains a properly-formatted XML string and \noutputs a properly-formatted JSON string version of the data. This is \nmeant to be used with the Javascript transform for the parsing of \ncomplex XML documents into parts. Once the XML is a JSON string, you \ncan convert it into a Javascript object using:", + "Icon": "N/A" + }, + "Decryptor": { + "Name": "Decryptor", + "Display Name": "Field Decrypter", + "Type": "Transform", + "Description": "Decrypts one or more fields in input records using a keystore \nthat must be present on all nodes of the cluster.", + "Icon": "N/A" + }, + "Decompressor": { + "Name": "Decompressor", + "Display Name": "Field Decompressor", + "Type": "Transform", + "Description": null, + "Icon": "N/A" + }, + "Encryptor": { + "Name": "Encryptor", + "Display Name": "Encryptor", + "Type": "Transform", + "Icon": "N/A", + "Description": "Encrypts one or more fields in input records using a java keystore \nthat must be present on all nodes of the cluster." + }, + "Normalize": { + "Name": "Normalize", + "Display Name": "Normalize", + "Type": "Transform", + "Icon": "N/A", + "Description": "Normalize is a transform plugin that breaks one source row into multiple target rows.\nAttributes stored in the columns of a table or a file may need to be broken into multiple\nrecords: for example, one record per column attribute. In general, the plugin allows the\nconversion of columns to rows." + }, + "Encoder": { + "Name": "Encoder", + "Display Name": "Field Encoder", + "Type": "Transform", + "Description": null, + "Icon": "N/A" + }, + "CSVParser": { + "Name": "CSVParser", + "Display Name": "CSVParser", + "Type": "Transform", + "Icon": "N/A", + "Description": null + }, + "Decoder": { + "Name": "Decoder", + "Display Name": "Decoder", + "Type": "Transform", + "Icon": "N/A", + "Description": null + }, + "CSVFormatter": { + "Name": "CSVFormatter", + "Display Name": "CSVFormatter", + "Type": "Transform", + "Description": null, + "Icon": "N/A" + }, + "Compressor": { + "Name": "Compressor", + "Display Name": "Compressor", + "Type": "Transform", + "Icon": "N/A", + "Description": "Compresses configured fields. Multiple fields can be specified to be compressed using different compression algorithms.\nPlugin supports SNAPPY, ZIP, and GZIP types of compression of fields." + }, + "JSONParser": { + "Name": "JSONParser", + "Display Name": "JSONParser", + "Type": "Transform", + "Description": "Parses an input JSON event into a record. The input JSON event could be either a map of\nstring fields to values or it could be a complex nested JSON structure. The plugin allows you\nto express JSON paths for extracting fields from complex nested input JSON.", + "Icon": "N/A" + }, + "Deduplicate": { + "Name": "Deduplicate", + "Display Name": "Deduplicate", + "Type": "Analytics", + "Description": null, + "Icon": "N/A" + }, + "SnapshotParquet": { + "Name": "SnapshotParquet", + "Display Name": "SnapshotParquet", + "Type": "Source", + "Description": "A batch source that reads from a corresponding SnapshotParquet sink.\nThe source will only read the most recent snapshot written to the sink.", + "Icon": "N/A" + }, + "SSH": { + "Name": "SSH", + "Display Name": "Remote Program Executor", + "Type": "Action", + "Description": "Establishes an SSH connection with remote machine to execute command on that machine.", + "Icon": "N/A" + }, + "Cube": { + "Name": "Cube", + "Display Name": "Cube", + "Type": "Sink", + "Description": "Batch sink that writes data to a Cube dataset.", + "Icon": "N/A" + }, + "TPFSParquet": { + "Name": "TPFSParquet", + "Display Name": "TPFSParquet", + "Type": "Sink", + "Description": null, + "Icon": "N/A" + }, + "Table": { + "Name": "Table", + "Display Name": "Table", + "Type": "Source", + "Icon": "", + "Description": "Outputs the entire contents of a CDAP Table each batch interval. The Table contents will be refreshed\nat configurable intervals." + }, + "SnapshotAvro": { + "Name": "SnapshotAvro", + "Display Name": "SnapshotAvro", + "Type": "Sink", + "Icon": "N/A", + "Description": "A batch sink for a PartitionedFileSet that writes snapshots of data as a new\npartition. Data is written in Avro format. A corresponding SnapshotAvro source\ncan be used to read only the most recently written snapshot." + }, + "Email": { + "Name": "Email", + "Display Name": "Email", + "Type": "Action", + "Description": "Sends an email at the end of a pipeline run.", + "Icon": "N/A" + }, + "FTP": { + "Name": "FTP", + "Display Name": "FTP", + "Type": "Source", + "Icon": "N/A", + "Description": "Batch source for an FTP or SFTP source. Prefix of the path ('ftp://...' or 'sftp://...') determines the source server\ntype, either FTP or SFTP." + }, + "TMS": { + "Name": "TMS", + "Display Name": "TMS", + "Type": "Alert Publisher", + "Icon": "", + "Description": "Publishes alerts to the CDAP Transactional Messaging System (TMS) as json objects. The plugin\nallows you to specify the topic and namespace to publish to, as well as a rate limit for the\nmaximum number of alerts to publish per second." + }, + "XMLReader": { + "Name": "XMLReader", + "Display Name": "XMLReader", + "Type": "Source", + "Description": "The XML Reader plugin is a source plugin that allows users to read XML files stored on HDFS.", + "Icon": "N/A" + }, + "FileDelete": { + "Name": "FileDelete", + "Display Name": "FileDelete", + "Type": "Action", + "Description": "Deletes a file or files.", + "Icon": "" + }, + "HDFSMove": { + "Name": "HDFSMove", + "Display Name": "HDFSMove", + "Type": "Action", + "Description": "Moves a file or files within an HDFS cluster.", + "Icon": "N/A" + }, + "Joiner": { + "Name": "Joiner", + "Display Name": "Joiner", + "Type": "Analytics", + "Icon": "N/A", + "Description": null + }, + "LogParser": { + "Name": "LogParser", + "Display Name": "LogParser", + "Type": "Transform", + "Icon": "N/A", + "Description": "Parses logs from any input source for relevant information such as URI, IP,\nbrowser, device, HTTP status code, and timestamp." + }, + "JavaScript": { + "Name": "JavaScript", + "Display Name": "JavaScript", + "Type": "Transform", + "Description": "Executes user-provided JavaScript that transforms one record into zero or more records.\nInput records are converted into JSON objects which can be directly accessed in\nJavaScript. The transform expects to receive a JSON object as input, which it can\nprocess and emit zero or more records or emit error using the provided emitter object.", + "Icon": "N/A" + }, + "KVTable": { + "Name": "KVTable", + "Display Name": "KVTable", + "Type": "Source", + "Description": "Reads the entire contents of a KeyValueTable, outputting records with a 'key' field and a\n'value' field. Both fields are of type bytes.", + "Icon": "N/A" + }, + "HDFSDelete": { + "Name": "HDFSDelete", + "Display Name": "HDFSDelete", + "Type": "Action", + "Icon": "N/A", + "Description": "Deletes a file or files within an HDFS cluster." + }, + "FileMove": { + "Name": "FileMove", + "Display Name": "FileMove", + "Type": "Action", + "Icon": "", + "Description": "Moves a file or files." + }, + "RowDenormalizer": { + "Name": "RowDenormalizer", + "Display Name": "RowDenormalizer", + "Type": "Analytics", + "Description": "Converts raw data into denormalized data based on a key column. User is able to specify the list of fields that should be used in the denormalized record, with an option to use an alias for the output field name. For example, 'ADDRESS' in the input is mapped to 'addr' in the output schema. ", + "Icon": "N/A" + }, + "TPFSAvro": { + "Name": "TPFSAvro", + "Display Name": "Avro Time Partitioned Dataset", + "Type": "Sink", + "Description": null, + "Icon": "N/A" + }, + "Distinct": { + "Name": "Distinct", + "Display Name": "Distinct", + "Type": "Analytics", + "Description": "De-duplicates input records so that all output records are distinct.\nCan optionally take a list of fields, which will project out all other fields and perform a distinct on just those fields.", + "Icon": "N/A" + }, + "Stream": { + "Name": "Stream", + "Display Name": "CDAP Stream", + "Type": "Source", + "Description": null, + "Icon": "N/A" + }, + "PythonEvaluator": { + "Name": "PythonEvaluator", + "Display Name": "PythonEvaluator", + "Type": "Transform", + "Icon": "N/A", + "Description": "Executes user-provided python code that transforms one record into zero or more records.\nEach input record is converted into a dictionary which can be directly accessed in\npython. The transform expects to receive a dictionary as input, which it can\nprocess and emit zero or more transformed dictionaries, or emit an error dictionary using the provided emitter object." + }, + "Window": { + "Name": "Window", + "Display Name": "Window", + "Type": "Analytics", + "Icon": "N/A", + "Description": "The Window plugin is used to window a part of a streaming pipeline." + }, + "TPFSOrc": { + "Name": "TPFSOrc", + "Display Name": "TPFSOrc", + "Type": "Sink", + "Icon": "N/A", + "Description": null + }, + "ErrorCollector": { + "Name": "ErrorCollector", + "Display Name": "ErrorCollector", + "Type": "Error Handler", + "Description": "The ErrorCollector plugin takes errors emitted from the previous stage and flattens them by adding\nthe error message, code, and stage to the record and outputting the result.", + "Icon": "N/A" + }, + "StructuredRecordToGenericRecord": { + "Name": "StructuredRecordToGenericRecord", + "Display Name": "StructuredRecordToGenericRecord", + "Type": "Transform", + "Description": "Transforms a StructuredRecord into an Avro GenericRecord. ", + "Icon": "N/A" + }, + "SnapshotText": { + "Name": "SnapshotText", + "Display Name": "SnapshotText", + "Type": "Sink", + "Description": "A batch sink for a PartitionedFileSet that writes snapshots of data as a new\npartition. Data is written in Text format. ", + "Icon": "N/A" + }, + "WindowsShareCopy": { + "Name": "WindowsShareCopy", + "Display Name": "WindowsShareCopy", + "Type": "Action", + "Description": "Copies a file or files on a Microsoft Windows share to an HDFS directory.", + "Icon": "N/A" + }, + "Validator": { + "Name": "Validator", + "Display Name": "Validator", + "Type": "Transform", + "Description": "Validates a record, writing to an error dataset if the record is invalid.\nOtherwise it passes the record on to the next stage." + }, + "GroupByAggregate": { + "Name": "GroupByAggregate", + "Display Name": "Group By", + "Type": "Analytics", + "Description": null, + "Icon": "N/A" + }, + "Excel": { + "Name": "Excel", + "Display Name": "Excel", + "Type": "Source", + "Icon": "N/A", + "Description": "The Excel plugin provides user the ability to read data from one or more Excel file(s)." + }, + "Projection": { + "Name": "Projection", + "Display Name": "Projection", + "Type": "Transform", + "Icon": "N/A", + "Description": "The Projection transform lets you drop, keep, rename, and cast fields to a different type.\nFields are first dropped based on the drop or keep field, then cast, then renamed." + }, + "BigQueryTable": { + "Name": "BigQueryTable", + "Display Name": "Google Big Query Table", + "Type": "Sink", + "Description": "This plugins exports a bigquery table as sink to be ingested into the processing pipeline.\nPlugin requires a service account to access the bigquery table. In order to configure\nthe service account visit https://cloud.google.com. Make sure you provide right permissions\nto service account for accessing BigQuery API.", + "Icon": "" + }, + "GCSFile": { + "Name": "GCSFile", + "Display Name": "Google Cloud Storage File", + "Type": "Source", + "Icon": "" + }, + "GCSBucketCreate": { + "Name": "GCSBucketCreate", + "Display Name": "Google Cloud Storage Path Create", + "Type": "Action", + "Description": "This plugin is used for creating directories on Google Cloud Storage (GCS).", + "Icon": "" + }, + "GCS": { + "Name": "GCS", + "Display Name": "GCS", + "Type": "Sink", + "Icon": "", + "Description": "This plugin writes records to one or more files in a directory on Google Cloud Storage.\nFiles can be written in various formats such as csv, avro, parquet, and json." + }, + "GooglePublisher": { + "Name": "GooglePublisher", + "Display Name": "GooglePublisher", + "Type": "Sink", + "Description": null, + "Icon": "" + }, + "Spanner": { + "Name": "Spanner", + "Display Name": "Spanner", + "Type": "Source", + "Description": "This source reads from a Google Cloud Spanner table.\nCloud Spanner is a fully managed, mission-critical, relational database service that offers transactional\nconsistency at global scale, schemas, SQL (ANSI 2011 with extensions),\nand automatic, synchronous replication for high availability.", + "Icon": "" + }, + "GCSBucketDelete": { + "Name": "GCSBucketDelete", + "Display Name": "Google Cloud Storage Path Delete", + "Type": "Action", + "Description": "This plugin is used for deleting directories on Google Cloud Storage (GCS).", + "Icon": "" + }, + "SpeechToText": { + "Name": "SpeechToText", + "Display Name": "Google Cloud Speech-to-Text", + "Type": "Transform", + "Description": "This plugin converts audio files to text by using Google Cloud Speech-to-Text.", + "Icon": "" + }, + "GoogleSubscriber": { + "Name": "GoogleSubscriber", + "Display Name": "GoogleSubscriber", + "Type": "Source", + "Icon": "", + "Description": null + }, + "Repartitioner": { + "Name": "Repartitioner", + "Display Name": "Repartitioner", + "Type": "Analytics", + "Icon": "N/A", + "Description": "This plugins re-partitions a Spark RDD." + }, + "S3ToRedshift": { + "Name": "S3ToRedshift", + "Display Name": "S3ToRedshift", + "Type": "Action", + "Icon": "N/A", + "Description": "S3ToRedshift Action that will load the data from AWS S3 bucket into the AWS Redshift table." + }, + "Run": { + "Name": "Run", + "Display Name": "Run", + "Type": "Transform", + "Icon": "N/A", + "Description": "Runs an executable binary which is installed and available on the local filesystem of the Hadoop nodes. Run transform\nplugin allows the user to read the structured record as input and returns the output record, to be further processed\ndownstream in the pipeline." + }, + "WholeFileReader": { + "Name": "WholeFileReader", + "Display Name": "WholeFileReader", + "Type": "Source", + "Icon": "N/A", + "Description": "Whole file source reads the entire file. It's highly recommended not\nto use this plugin when the files are very large and splittable." + }, + "AmazonS3Client": { + "Name": "AmazonS3Client", + "Display Name": "AmazonS3Client", + "Type": "Action", + "Icon": "", + "Description": "The Amazon S3 Client Action is used to work with S3 buckets and objects before or after the execution of a pipeline." + }, + "StateRestore": { + "Name": "StateRestore", + "Display Name": "StateRestore", + "Type": "Action", + "Icon": "N/A", + "Description": "Description" + }, + "NGramTransform": { + "Name": "NGramTransform", + "Display Name": "NGramTransform", + "Type": "Analytics", + "Icon": "N/A", + "Description": "Transforms the input features into n-grams, where n-gram is a sequence of n tokens (typically words) for some integer 'n'." + }, + "Decompress": { + "Name": "Decompress", + "Display Name": "Decompress", + "Type": "Action", + "Icon": "", + "Description": null + }, + "VerticaBulkExportAction": { + "Name": "VerticaBulkExportAction", + "Display Name": "VerticaBulkExportAction", + "Type": "Action", + "Icon": "N/A", + "Description": "Bulk exports data in a vertica table into a file." + }, + "RedshiftToS3": { + "Name": "RedshiftToS3", + "Display Name": "RedshiftToS3", + "Type": "Action", + "Icon": "N/A", + "Description": null + }, + "FileAppender": { + "Name": "FileAppender", + "Display Name": "File Appender", + "Type": "Sink", + "Description": "Writes to a CDAP FileSet in text format. HDFS append must be enabled for this to work.\nOne line is written for each record sent to the sink.\nAll record fields are joined using a configurable separator.\nEach time a batch is written, the sink will examine all existing files in the output directory.\nIf there are any files that are smaller in size than the size threshold, or more recent than\nthe age threshold, new data will be appended to those files instead of written to new files.", + "Icon": "" + }, + "CDCDatabase": { + "Name": "CDCDatabase", + "Display Name": "CDCDatabase", + "Type": "Source", + "Icon": "N/A", + "Description": "This plugin reads Change Data Capture (CDC) events from a Golden Gate Kafka topic." + }, + "CDCHBase": { + "Name": "CDCHBase", + "Display Name": "CDCHBase", + "Type": "Sink", + "Icon": "N/A", + "Description": "This plugin takes input from a CDC source and writes the changes to HBase. \nIt will write to the HBase instance running on the cluster." + }, + "CDCKudu": { + "Name": "CDCKudu", + "Display Name": "CDCKudu", + "Type": "Sink", + "Icon": "N/A", + "Description": "This plugin takes input from a CDC source and writes the changes to Kudu." + }, + "GoldenGateNormalizer": { + "Name": "GoldenGateNormalizer", + "Display Name": "GoldenGateNormalizer", + "Type": "Transform", + "Icon": "N/A" + }, + "MQTT": { + "Name": "MQTT", + "Display Name": "MQTT", + "Type": "Source", + "Icon": "N/A", + "Description": "The MQTT Streaming Source allows you to subscribe to an MQTT broker in a streaming context. You\nspecify the topic to subscribe to as an MQTT client." + }, + "DataProfiler": { + "Name": "DataProfiler", + "Display Name": "DataProfiler", + "Type": "Analytics", + "Icon": "", + "Description": "Calculates statistics for each input field.\nFor every field, a total count and null count will be calculated.\nFor numeric fields, min, max, mean, stddev, zero count, positive count, and negative count will be calculated.\nFor string fields, min length, max length, mean length, and empty count will be calculated.\nFor boolean fields, true and false counts will be calculated.\nWhen calculating means, only non-null values are considered." + }, + "Elasticsearch": { + "Name": "Elasticsearch", + "Display Name": "Elasticsearch", + "Type": "Source", + "Description": "Takes the Structured Record from the input source and converts it to a JSON string, then indexes it in\nElasticsearch using the index, type, and idField specified by the user. The Elasticsearch server should\nbe running prior to creating the application.", + "Icon": "N/A" + }, + "KinesisSource": { + "Name": "KinesisSource", + "Display Name": "KinesisSource", + "Type": "Source", + "Icon": "N/A", + "Description": "Spark streaming source that reads from AWS Kinesis streams." + }, + "PDFExtractor": { + "Name": "PDFExtractor", + "Display Name": "PDFExtractor", + "Type": "Transform", + "Icon": "N/A", + "Description": null + }, + "Kudu": { + "Name": "Kudu", + "Display Name": "Kudu", + "Type": "Source", + "Icon": "N/A", + "Description": "CDAP Plugin for reading data from Apache Kudu table." + }, + "ParquetDynamicPartitionedDataset": { + "Name": "ParquetDynamicPartitionedDataset", + "Display Name": "ParquetDynamicPartitionedDataset", + "Type": "Sink", + "Icon": "N/A", + "Description": null + }, + "AvroDynamicPartitionedDataset": { + "Name": "AvroDynamicPartitionedDataset", + "Display Name": "AvroDynamicPartitionedDataset", + "Type": "Sink", + "Icon": "N/A", + "Description": null + }, + "ORCDynamicPartitionedDataset": { + "Name": "ORCDynamicPartitionedDataset", + "Display Name": "ORCDynamicPartitionedDataset", + "Type": "Sink", + "Icon": "N/A", + "Description": null + }, + "DecisionTreePredictor": { + "Name": "DecisionTreePredictor", + "Display Name": "DecisionTreePredictor", + "Type": "Analytics", + "Icon": "N/A", + "Description": "Loads a Decision Tree Regression model from a FileSet and uses it to label the records based on the predicted values." + }, + "DecisionTreeTrainer": { + "Name": "DecisionTreeTrainer", + "Display Name": "DecisionTreeTrainer", + "Type": "Sink", + "Icon": "N/A", + "Description": "Trains a regression model based upon a particular label and features of a record. Saves this model to a FileSet." + }, + "S3FileMetadataSource": { + "Name": "S3FileMetadataSource", + "Display Name": "S3FileMetadataSource", + "Type": "Source", + "Icon": "", + "Description": "The S3 File Metadata plugin is a source plugin that allows users to read file metadata from an S3 Filesystem." + }, + "FileMetadataSource": { + "Name": "FileMetadataSource", + "Display Name": "FileMetadataSource", + "Type": "Source", + "Icon": "", + "Description": "The File Metadata plugin is a source plugin that allows users to read file metadata from a local HDFS or a local filesystem." + }, + "FileCopySink": { + "Name": "FileCopySink", + "Display Name": "FileCopySink", + "Type": "Sink", + "Icon": "", + "Description": "The File Copy plugin is a sink plugin that takes file metadata records as inputs and copies the files into the local HDFS or the local filesystem." + }, + "S3FileCopySink": { + "Name": "S3FileCopySink", + "Display Name": "Amazon S3 Whole File Copier", + "Type": "Sink", + "Description": "The S3 File Copy plugin is a sink plugin that takes file metadata records as inputs and copies the files into an Amazon S3 filesystem.", + "Icon": "" + }, + "Sampling": { + "Name": "Sampling", + "Display Name": "Sampling", + "Type": "Analytics", + "Description": "Sampling a large dataset flowing through this plugin to pull random records. Supports two types of sampling\ni.e, Systematic Sampling and Reservoir Sampling.", + "Icon": "" + }, + "DynTable": { + "Name": "DynTable", + "Display Name": "DynTable", + "Type": "Sink", + "Icon": "", + "Description": "This plugin supports writing dynamic schemas record to CDAP Dataset Table. In addition to writing dynamic schema tables, it also support regular structured records to be written to Tables." + }, + "DynHBase": { + "Name": "DynHBase", + "Display Name": "HBase Table with Dynamic Schema", + "Type": "Sink", + "Description": "This plugin supports writing dynamic schemas record to local or remote HBase Table. In addition to writing dynamic schema tables, it also support regular structured records to be written to Tables.", + "Icon": "" + }, + "LoadToSnowflake": { + "Name": "LoadToSnowflake", + "Display Name": "LoadToSnowflake", + "Type": "Action", + "Icon": "N/A", + "Description": null + }, + "NaiveBayesClassifier": { + "Name": "NaiveBayesClassifier", + "Display Name": "NaiveBayesClassifier", + "Type": "Analytics", + "Icon": "N/A", + "Description": "Loads a Naive Bayes model from a file of a FileSet dataset and uses it to classify records." + }, + "NaiveBayesTrainer": { + "Name": "NaiveBayesTrainer", + "Display Name": "NaiveBayesTrainer", + "Type": "Sink", + "Icon": "N/A", + "Description": "Using a Naive Bayes algorithm, trains a model based upon a particular label and text field of a record.\nSaves this model to a file in a FileSet dataset." + }, + "ArgumentSetter": { + "Name": "ArgumentSetter", + "Display Name": "HTTP Argument Setter", + "Type": "Action", + "Description": "Performs an HTTP request some endpoint to get a driver specification. Based on the spec,\nit will make another call to a nebula endpoint to get data about the dataset, which it will\nuse to set 'input.path', 'input.properties', 'directives', and 'output.schema' arguments\nthat can be used later on in the pipeline through macros.", + "Icon": "" + }, + "DataFactoryDriver": { + "Name": "DataFactoryDriver", + "Display Name": "DataFactoryDriver", + "Type": "Action", + "Description": "Performs an HTTP request some endpoint to get a driver specification. Based on the spec,\nit will make another call to a nebula endpoint to get data about the dataset, which it will\nuse to set 'input.path', 'input.properties', 'directives', and 'output.schema' arguments\nthat can be used later on in the pipeline through macros.", + "Icon": "N/A" + }, + "SFTPCopy": { + "Name": "SFTPCopy", + "Display Name": "SFTPCopy", + "Type": "Action", + "Icon": "N/A", + "Description": null + }, + "SFTPDelete": { + "Name": "SFTPDelete", + "Display Name": "SFTPDelete", + "Type": "Action", + "Icon": "N/A", + "Description": null + }, + "SFTPPut": { + "Name": "SFTPPut", + "Display Name": "SFTPPut", + "Type": "Action", + "Icon": "N/A", + "Description": null + }, + "AzureBlobStore": { + "Name": "AzureBlobStore", + "Display Name": "AzureBlobStore", + "Type": "Source", + "Icon": "N/A", + "Description": "Batch source to use Microsoft Azure Blob Storage as a source." + }, + "ADLSBatchSink": { + "Name": "ADLSBatchSink", + "Display Name": "ADLSBatchSink", + "Type": "Sink", + "Icon": "", + "Description": "Azure Data Lake Store Batch Sink writes data to Azure Data Lake Store directory in avro, orc or text format." + }, + "ADLSDelete": { + "Name": "ADLSDelete", + "Display Name": "ADLSDelete", + "Type": "Action", + "Icon": "", + "Description": "Deletes a file or files within ADLS file system." + }, + "AzureDataLakeStore": { + "Name": "AzureDataLakeStore", + "Display Name": "AzureDataLakeStore", + "Type": "Source", + "Icon": "", + "Description": "Azure Data Lake Store Batch Source reads data from Azure Data Lake Store files and converts it into \nStructuredRecord." + }, + "LogisticRegressionClassifier": { + "Name": "LogisticRegressionClassifier", + "Display Name": "LogisticRegressionClassifier", + "Type": "Analytics", + "Icon": "N/A", + "Description": "Loads a Logistic Regression model from a file of a FileSet dataset and uses it to classify records." + }, + "LogisticRegressionTrainer": { + "Name": "LogisticRegressionTrainer", + "Display Name": "LogisticRegressionTrainer", + "Type": "Sink", + "Icon": "N/A", + "Description": "Trains a classification model based upon a particular label and features of a record. Saves this model to a FileSet." + }, + "AzureDelete": { + "Name": "AzureDelete", + "Display Name": "AzureDelete", + "Type": "Action", + "Icon": "", + "Description": "Azure Delete Action plugin deletes a container on Azure Storage Blob service. " + }, + "VerticaBulkImportAction": { + "Name": "VerticaBulkImportAction", + "Display Name": "VerticaBulkImportAction", + "Type": "Action", + "Icon": "N/A", + "Description": "Vertica Bulk Import Action plugin gets executed after successful mapreduce or spark job. It reads all the files in a given directory and bulk imports contents of those files into vertica table. " + }, + "MapRDBJSON": { + "Name": "MapRDBJSON", + "Display Name": "MapR-DB JSON Table", + "Type": "Sink", + "Description": "MapR-DB JSON table sink is used to write the JSON documents to the MapR-DB table.", + "Icon": "" + }, + "MapRStream": { + "Name": "MapRStream", + "Display Name": "MapR Stream Consumer", + "Type": "Source", + "Description": "MapR streaming source. Reads events from MapR stream.", + "Icon": "" + }, + "FTPCopy": { + "Name": "FTPCopy", + "Display Name": "FTPCopy", + "Type": "Action", + "Icon": "N/A", + "Description": "Copy files from FTP server to the specified destination." + }, + "RecordSplitter": { + "Name": "RecordSplitter", + "Display Name": "RecordSplitter", + "Type": "Transform", + "Icon": "N/A", + "Description": "Given a field and a delimiter, emits one record for each split of the field." + }, + "FileContents": { + "Name": "FileContents", + "Display Name": "File Contents Checker", + "Type": "Action", + "Description": "This action plugin can be used to check if a file is empty or if the\ncontents of a file match a given pattern.", + "Icon": "" + }, + "HTTPToHDFS": { + "Name": "HTTPToHDFS", + "Display Name": "HTTPToHDFS", + "Type": "Action", + "Icon": "N/A", + "Description": "Action to fetch data from an external http endpoint and create a file in HDFS." + }, + "OracleExport": { + "Name": "OracleExport", + "Display Name": "OracleExport", + "Type": "Action", + "Icon": "N/A", + "Description": "A Hydrator Action plugin to efficiently export data from Oracle to HDFS or local file system. The plugin uses\nOracle's command line tools to export data. The data exported from this tool can then be used in Hydrator pipelines." + }, + "AzureEventHub": { + "Name": "AzureEventHub", + "Display Name": "AzureEventHub", + "Type": "Source", + "Icon": "N/A", + "Description": "Azure Event Hub streaming source. Emits a record with the schema specified by the user. If no schema\nis specified, it will emit a record with 'message'(bytes)." + }, "CDCBigTable": { "Name": "CDCBigTable", "Display Name": "CDC Google Cloud Bigtable Sink", "Type": "Sink", "Description": "This plugin takes input from a CDC source and writes the changes to Cloud Bigtable.", "Icon": "" + }, + "ToUTF8": { + "Name": "ToUTF8", + "Display Name": "ToUTF8", + "Type": "Action", + "Icon": "N/A", + "Description": null + }, + "DateTransform": { + "Name": "DateTransform", + "Display Name": "DateTransform", + "Type": "Transform", + "Icon": "N/A", + "Description": "This transform takes a date in either a unix timestamp or a string, and converts it to a formatted string. (Macro-enabled)" + }, + "FailPipeline": { + "Name": "FailPipeline", + "Display Name": "Fail Pipeline", + "Type": "Sink", + "Description": "Batch Sink is used to fail the running pipeline when any of the record flows to this sink on receiving\nthe first record itself.", + "Icon": "" + }, + "HTTP": { + "Name": "HTTP", + "Display Name": "HTTP", + "Type": "Sink", + "Icon": "N/A", + "Description": "Sink plugin to send the messages from the pipeline to an external http endpoint." + }, + "DynamicMultiADLS": { + "Name": "DynamicMultiADLS", + "Display Name": "ADLS Batch Sink", + "Type": "Sink", + "Icon": "" + }, + "DynamicMultiFileset": { + "Name": "DynamicMultiFileset", + "Display Name": "DynamicMultiFileset", + "Type": "Sink", + "Icon": "", + "Description": "This plugin is normally used in conjunction with the MultiTableDatabase batch source to write records from multiple\ndatabases into multiple filesets in text format. Each fileset it writes to will contain a single 'ingesttime' partition,\nwhich will contain the logical start time of the pipeline run. The plugin expects that the filsets it needs to write\nto will be set as pipeline arguments, where the key is 'multisink.[fileset]' and the value is the fileset schema.\nNormally, you rely on the MultiTableDatabase source to set those pipeline arguments, but they can also be manually\nset or set by an Action plugin in your pipeline. The sink will expect each record to contain a special split field\nthat will be used to determine which records are written to each fileset. For example, suppose the\nthe split field is 'tablename'. A record whose 'tablename' field is set to 'activity' will be written to the 'activity'\nfileset." + }, + "MultiTableDatabase": { + "Name": "MultiTableDatabase", + "Display Name": "Multiple Database Tables", + "Type": "Source", + "Description": "Reads from multiple tables within a database using JDBC. Often used in conjunction with the DynamicMultiFileset sink\nto perform dumps from multiple tables to HDFS files in a single pipeline. The source will output a record for each\nrow in the tables it reads, with each record containing an additional field that holds the name of the table the\nrecord came from. In addition, for each table that will be read, this plugin will set pipeline arguments where the\nkey is 'multisink.[tablename]' and the value is the schema of the table. This is to make it work with the\nDynamicMultiFileset.", + "Icon": "" + }, + "AddField": { + "Name": "AddField", + "Display Name": "AddField", + "Type": "Transform", + "Icon": "N/A", + "Description": "Adds a new field to each record. The field value can either be a new UUID, or it can be set directly through\nconfiguration. This transform is used when you want to add a unique id field to each record, or when you want\nto tag each record with some constant value. For example, you may want to add the logical start time as a field\nto each record." + }, + "MultiFieldAdder": { + "Name": "MultiFieldAdder", + "Display Name": "MultiFieldAdder", + "Type": "Transform", + "Icon": "N/A", + "Description": "Multi Field Adder Transform allows you to add one or more fields to the output.\nEach field specified has a name and the value. The value is currently set to\nbe of type string." + }, + "AzureFaceExtractor": { + "Name": "AzureFaceExtractor", + "Display Name": "AzureFaceExtractor", + "Type": "Transform", + "Icon": "N/A", + "Description": null + }, + "GCSFileBlob": { + "Name": "GCSFileBlob", + "Display Name": "Google Cloud Storage File Blob", + "Type": "Source", + "Icon": "" + }, + "GCSParquet": { + "Name": "GCSParquet", + "Display Name": "GCSParquet", + "Type": "Sink", + "Icon": "", + "Description": "A GCS sink to write records as AVRO records into Parquet files." + }, + "SpeechTranslator": { + "Name": "SpeechTranslator", + "Display Name": "SpeechTranslator", + "Type": "Transform", + "Icon": "" + }, + "GCSAvro": { + "Name": "GCSAvro", + "Display Name": "GCSAvro", + "Type": "Sink", + "Icon": "", + "Description": "A GCS sink to write records as AVRO records." + }, + "GCSText": { + "Name": "GCSText", + "Display Name": "GCSText", + "Type": "Sink", + "Icon": "", + "Description": "A GCS sink to write records as Comma, Tab, Pipe, CTRL+A separated or JSON\ntext files." + }, + "S3Avro": { + "Name": "S3Avro", + "Display Name": "S3Avro", + "Type": "Sink", + "Icon": "", + "Description": "A batch sink for writing to Amazon S3 in Avro format." + }, + "S3Parquet": { + "Name": "S3Parquet", + "Display Name": "S3Parquet", + "Type": "Sink", + "Icon": "", + "Description": "A batch sink to write to S3 in Parquet format." + }, + "MainframeReader": { + "Name": "MainframeReader", + "Display Name": "MainframeReader", + "Type": "Source", + "Icon": "N/A", + "Description": "This is a source plugin that allows users to read and process mainframe files." + }, + "KinesisSink": { + "Name": "KinesisSink", + "Display Name": "KinesisSink", + "Type": "Sink", + "Icon": "N/A", + "Description": "Kinesis sink that outputs to a specified Amazon Kinesis Stream." + }, + "ScalaSparkProgram": { + "Name": "ScalaSparkProgram", + "Display Name": "ScalaSparkProgram", + "Type": "Action", + "Description": "Executes user-provided Spark code in Scala.", + "Icon": "N/A" + }, + "PySparkProgram": { + "Name": "PySparkProgram", + "Display Name": "PySpark Program", + "Type": "Action", + "Description": "Executes user-provided Spark code in Python.", + "Icon": "" + }, + "ScalaSparkCompute": { + "Name": "ScalaSparkCompute", + "Display Name": "ScalaSparkCompute", + "Type": "Analytics", + "Icon": "N/A", + "Description": "Executes user-provided Spark code in Scala that transforms RDD to RDD with full\naccess to all Spark features." + }, + "ScalaSparkSink": { + "Name": "ScalaSparkSink", + "Display Name": "ScalaSparkSink", + "Type": "Sink", + "Icon": "N/A", + "Description": "Executes user-provided Spark code in Scala that operates on an input RDD or Dataframe with full\naccess to all Spark features." + }, + "Trash": { + "Name": "Trash", + "Display Name": "Trash", + "Type": "Sink", + "Icon": "N/A", + "Description": "Trash consumes all the records on the input and eats them all,\nmeans no output is generated or no output is stored anywhere." + }, + "HiveExport": { + "Name": "HiveExport", + "Display Name": "Hive Bulk Export", + "Type": "Action", + "Description": null, + "Icon": "" + }, + "HiveImport": { + "Name": "HiveImport", + "Display Name": "Hive Bulk Import", + "Type": "Action", + "Description": null, + "Icon": "" + }, + "OrientDB": { + "Name": "OrientDB", + "Display Name": "OrientDB", + "Type": "Sink", + "Icon": "N/A", + "Description": "Writes data to an OrientDB database." + }, + "FastFilter": { + "Name": "FastFilter", + "Display Name": "FastFilter", + "Type": "Transform", + "Icon": "N/A", + "Description": "Filters out messages based on a specified criteria." + }, + "AzureDecompress": { + "Name": "AzureDecompress", + "Display Name": "AzureDecompress", + "Type": "Action", + "Icon": "", + "Description": "Azure decompress Action plugin decompress gz files from a container on Azure Storage Blob service into another container. " + }, + "DynamoDB": { + "Name": "DynamoDB", + "Display Name": "DynamoDB", + "Type": "Source", + "Icon": "", + "Description": "DynamoDB Batch Source that will read the data items from AWS DynamoDB table and convert each item into the\nStructuredRecord as per the schema specified by the user, that can be further processed downstream in the pipeline.\nUser can provide the query, to read the items from DynamoDB table." + }, + "TopN": { + "Name": "TopN", + "Display Name": "TopN", + "Type": "Analytics", + "Icon": "N/A", + "Description": "Top-N returns the top \"n\" records from the input set, based on the criteria specified in the plugin configuration." } -} +} \ No newline at end of file