Remote Debug Code Running in Spark Shell
export SPARK_SUBMIT_OPTS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=9999"
./spark-shell
change spark.ui.port
./spark-shell --conf spark.ui.port=12345
Disable/Change Log Level in Spark Shell
import org.apache.log4j.Logger
import org.apache.log4j.Level
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
Spark Event Logs and History Server
-- So we can save data, and compare them.
spark.eventLog.enabled=true
spark.eventLog.dir=/tmp/spark-events
spark-home/sbin/start-history-server.sh /tmp/spark-events
Examples
val textFile = spark.textFile("hdfs://...")
val counts = textFile.flatMap(line => line.split(" "))
.map(word => (word, 1))
.reduceByKey(_ + _)
counts.saveAsTextFile("hdfs://...")
export SPARK_SUBMIT_OPTS="-agentlib:jdwp=transport=dt_socket,server=y,suspend=y,address=9999"
./spark-shell
change spark.ui.port
./spark-shell --conf spark.ui.port=12345
Disable/Change Log Level in Spark Shell
import org.apache.log4j.Logger
import org.apache.log4j.Level
Logger.getLogger("org").setLevel(Level.OFF)
Logger.getLogger("akka").setLevel(Level.OFF)
Spark Event Logs and History Server
-- So we can save data, and compare them.
spark.eventLog.enabled=true
spark.eventLog.dir=/tmp/spark-events
spark-home/sbin/start-history-server.sh /tmp/spark-events
Examples
val textFile = spark.textFile("hdfs://...")
val counts = textFile.flatMap(line => line.split(" "))
.map(word => (word, 1))
.reduceByKey(_ + _)
counts.saveAsTextFile("hdfs://...")