java - Writing a DF as a parquet with null values using Spark -
i trying read csv using struct type more struct fields csv file has columns. results in extraneous columns being null. when trying write file out parquet getting java.lang.numberformatexception: null.
java.lang.numberformatexception: null @ java.lang.long.parselong(long.java:404) @ java.lang.long.parselong(long.java:483) @ scala.collection.immutable.stringlike$class.tolong(stringlike.scala:230) @ scala.collection.immutable.stringops.tolong(stringops.scala:31) @ com.databricks.spark.csv.util.typecast$.castto(typecast.scala:54) @ com.databricks.spark.csv.csvrelation$$anonfun$buildscan$6.apply(csvrelation.scala:181) @ com.databricks.spark.csv.csvrelation$$anonfun$buildscan$6.apply(csvrelation.scala:162) @ scala.collection.iterator$$anon$13.hasnext(iterator.scala:371) @ scala.collection.iterator$$anon$11.hasnext(iterator.scala:327) @ scala.collection.iterator$$anon$11.hasnext(iterator.scala:327) @ scala.collection.iterator$$anon$13.hasnext(iterator.scala:371) @ org.apache.spark.sql.execution.datasources.dynamicpartitionwritercontainer.writerows(writercontainer.scala:349) @ org.apache.spark.sql.execution.datasources.insertintohadoopfsrelation$$anonfun$run$1$$anonfun$apply$mcv$sp$3.apply(insertintohadoopfsrelation.scala:150) @ org.apache.spark.sql.execution.datasources.insertintohadoopfsrelation$$anonfun$run$1$$anonfun$apply$mcv$sp$3.apply(insertintohadoopfsrelation.scala:150) @ org.apache.spark.scheduler.resulttask.runtask(resulttask.scala:66) @ org.apache.spark.scheduler.task.run(task.scala:89) @ org.apache.spark.executor.executor$taskrunner.run(executor.scala:213) @ java.util.concurrent.threadpoolexecutor.runworker(threadpoolexecutor.java:1145) @ java.util.concurrent.threadpoolexecutor$worker.run(threadpoolexecutor.java:615) @ java.lang.thread.run(thread.java:745)
dataframewithnulls.coalesce(4).write.option("mode","permissive").mode("append").partitionby("$day", "$hour").parquet(s"$writedir/")
Comments
Post a Comment