From 9698707f070ee7f535c2e9ac64d690a337bc4a07 Mon Sep 17 00:00:00 2001 From: Domnic Poravanthattil Date: Wed, 10 Jun 2020 12:57:50 -0700 Subject: [PATCH 1/3] CDAP-16222: Fix imports in dynamic spark plugin documentation. Cherry-picking changes from develop to release/2.2. --- docs/ScalaSparkCompute-sparkcompute.md | 6 +++--- docs/ScalaSparkProgram-sparkprogram.md | 4 ++-- docs/ScalaSparkSink-sparksink.md | 6 +++--- widgets/ScalaSparkProgram-sparkprogram.json | 2 +- 4 files changed, 9 insertions(+), 9 deletions(-) diff --git a/docs/ScalaSparkCompute-sparkcompute.md b/docs/ScalaSparkCompute-sparkcompute.md index 3fedcb0..00daf07 100644 --- a/docs/ScalaSparkCompute-sparkcompute.md +++ b/docs/ScalaSparkCompute-sparkcompute.md @@ -47,9 +47,9 @@ and produces records of two fields, ``'word'`` and ``'count'``. The following imports are included automatically and are ready for the user code to use: - import co.cask.cdap.api.data.format._ - import co.cask.cdap.api.data.schema._; - import co.cask.cdap.etl.api.batch._ + import io.cdap.cdap.api.data.format._ + import io.cdap.cdap.api.data.schema._; + import io.cdap.cdap.etl.api.batch._ import org.apache.spark._ import org.apache.spark.api.java._ import org.apache.spark.rdd._ diff --git a/docs/ScalaSparkProgram-sparkprogram.md b/docs/ScalaSparkProgram-sparkprogram.md index 3943820..c1c5cea 100644 --- a/docs/ScalaSparkProgram-sparkprogram.md +++ b/docs/ScalaSparkProgram-sparkprogram.md @@ -13,14 +13,14 @@ Properties **mainClass** The fully qualified class name for the Spark application. It must either be an ``object`` that has a ``main`` method define inside, with the method signature as ``def main(args: Array[String]): Unit``; or it is a class that extends from the CDAP -``co.cask.cdap.api.spark.SparkMain`` trait that implements the ``run`` method, with the method signature as +``io.cdap.cdap.api.spark.SparkMain`` trait that implements the ``run`` method, with the method signature as ``def run(implicit sec: SparkExecutionContext): Unit`` **scalaCode** The self-contained Spark application written in Scala. For example, an application that reads from CDAP stream with name ``streamName``, performs a simple word count logic and logs the result can be written as: - import co.cask.cdap.api.spark._ + import io.cdap.cdap.api.spark._ import org.apache.spark._ import org.slf4j._ diff --git a/docs/ScalaSparkSink-sparksink.md b/docs/ScalaSparkSink-sparksink.md index 0593795..8ed9c07 100644 --- a/docs/ScalaSparkSink-sparksink.md +++ b/docs/ScalaSparkSink-sparksink.md @@ -45,9 +45,9 @@ This will perform a word count on the input field ``'body'``, then write out the The following imports are included automatically and are ready for the user code to use: - import co.cask.cdap.api.data.format._ - import co.cask.cdap.api.data.schema._; - import co.cask.cdap.etl.api.batch._ + import io.cdap.cdap.api.data.format._ + import io.cdap.cdap.api.data.schema._; + import io.cdap.cdap.etl.api.batch._ import org.apache.spark._ import org.apache.spark.api.java._ import org.apache.spark.rdd._ diff --git a/widgets/ScalaSparkProgram-sparkprogram.json b/widgets/ScalaSparkProgram-sparkprogram.json index 6257c35..2fdf10d 100644 --- a/widgets/ScalaSparkProgram-sparkprogram.json +++ b/widgets/ScalaSparkProgram-sparkprogram.json @@ -19,7 +19,7 @@ "label": "Scala", "name": "scalaCode", "widget-attributes": { - "default": "import co.cask.cdap.api.spark._\nimport org.apache.spark._\nimport org.slf4j._\n\nclass SparkProgram extends SparkMain {\n import SparkProgram._\n\n override def run(implicit sec: SparkExecutionContext): Unit = {\n LOG.info(\"Spark Program Started\")\n\n val sc = new SparkContext\n\n LOG.info(\"Spark Program Completed\")\n }\n}\n\nobject SparkProgram {\n val LOG = LoggerFactory.getLogger(getClass())\n}" + "default": "import io.cdap.cdap.api.spark._\nimport org.apache.spark._\nimport org.slf4j._\n\nclass SparkProgram extends SparkMain {\n import SparkProgram._\n\n override def run(implicit sec: SparkExecutionContext): Unit = {\n LOG.info(\"Spark Program Started\")\n\n val sc = new SparkContext\n\n LOG.info(\"Spark Program Completed\")\n }\n}\n\nobject SparkProgram {\n val LOG = LoggerFactory.getLogger(getClass())\n}" } }, { From 69b046492d3bfb6bd60b89c715506abc98dbdc0b Mon Sep 17 00:00:00 2001 From: Domnic Poravanthattil Date: Wed, 10 Jun 2020 19:37:07 -0700 Subject: [PATCH 2/3] CDAP-16222: Bumping CDAP version to 6.1.3 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 0103fe5..c8901b3 100644 --- a/pom.xml +++ b/pom.xml @@ -29,7 +29,7 @@ system:cdap-data-pipeline[6.0.0-SNAPSHOT,7.0.0-SNAPSHOT) system:cdap-data-streams[6.0.0-SNAPSHOT,7.0.0-SNAPSHOT) - 6.0.0-SNAPSHOT + 6.1.3-SNAPSHOT 2.1.3 1.0.9 From c4a79967d5751d789797855007bdf219d1c17ce9 Mon Sep 17 00:00:00 2001 From: bajram-adapt Date: Thu, 22 Oct 2020 12:31:09 -0500 Subject: [PATCH 3/3] Show new error message when output schema is not maching input --- pom.xml | 10 ++++++++-- .../cdap/plugin/spark/dynamic/ScalaSparkCompute.java | 8 ++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/pom.xml b/pom.xml index c8901b3..7315f15 100644 --- a/pom.xml +++ b/pom.xml @@ -22,16 +22,17 @@ io.cdap.plugin dynamic-spark - 2.2.2 + 2.2.3-SNAPSHOT system:cdap-data-pipeline[6.0.0-SNAPSHOT,7.0.0-SNAPSHOT) system:cdap-data-streams[6.0.0-SNAPSHOT,7.0.0-SNAPSHOT) - 6.1.3-SNAPSHOT + 6.1.1 2.1.3 1.0.9 + 2.4.0 true @@ -100,6 +101,11 @@ 4.11 test + + io.cdap.plugin + hydrator-common + ${hydrator-common.version} + io.cdap.cdap hydrator-test diff --git a/src/main/java/io/cdap/plugin/spark/dynamic/ScalaSparkCompute.java b/src/main/java/io/cdap/plugin/spark/dynamic/ScalaSparkCompute.java index c5de6d6..6d8c5a8 100644 --- a/src/main/java/io/cdap/plugin/spark/dynamic/ScalaSparkCompute.java +++ b/src/main/java/io/cdap/plugin/spark/dynamic/ScalaSparkCompute.java @@ -24,6 +24,7 @@ import io.cdap.cdap.api.data.schema.Schema; import io.cdap.cdap.api.plugin.PluginConfig; import io.cdap.cdap.api.spark.sql.DataFrames; +import io.cdap.cdap.etl.api.FailureCollector; import io.cdap.cdap.etl.api.PipelineConfigurer; import io.cdap.cdap.etl.api.StageConfigurer; import io.cdap.cdap.etl.api.batch.SparkCompute; @@ -98,6 +99,13 @@ public JavaRDD transform(SparkExecutionPluginContext context, // If there is no output schema configured, derive it from the DataFrame // Otherwise, assume the DataFrame has the correct schema already outputSchema = DataFrames.toSchema((DataType) invokeDataFrameMethod(result, "schema")); + } else { + Schema dataSchema = DataFrames.toSchema((DataType) invokeDataFrameMethod(result, "schema")); + if (!dataSchema.isCompatible(outputSchema)) { + FailureCollector collector = context.getFailureCollector(); + collector.addFailure("Schema mismatch.", "Output schema is not matching input schema."); + collector.getOrThrowException(); + } } //noinspection unchecked return ((JavaRDD) invokeDataFrameMethod(result, "toJavaRDD")).map(new RowToRecord(outputSchema)); pFad - Phonifier reborn

Pfad - The Proxy pFad of © 2024 Garber Painting. All rights reserved.

Note: This service is not intended for secure transactions such as banking, social media, email, or purchasing. Use at your own risk. We assume no liability whatsoever for broken pages.


Alternative Proxies:

Alternative Proxy

pFad Proxy

pFad v3 Proxy

pFad v4 Proxy