####################################################################### ## ## Name: plot_probri.R ## ## Description: ## ## Usage: ## Rscript plot_probri.R ## ## Arguments: ## ## Details: ## ## Examples: ## ## Author: ## ######################################################################## # Check that the MET_INSTALL_DIR environment variable is set MET_INSTALL_DIR = Sys.getenv("MET_INSTALL_DIR", unset=NA); if(is.na(MET_INSTALL_DIR)) { cat("ERROR: The \"MET_INSTALL_DIR\" environment variable must be set.\n"); quit(status=1); } # Make sure that tc_stat can be found #tc_stat = Sys.which("tc_stat"); tc_stat = ${MET_INSTALL_DIR}/bin/tc_stat"; if(nchar(tc_stat) == 0) { cat("ERROR: Cannot find the tc_stat tool in your path.\n"); quit(status=1); } ######################################################################## # # Usage statement. # ######################################################################## usage = function() { cat("\nUsage: plot_probri.R\n"); cat(" -lookin tcst_file_list\n"); cat(" [-outdir path]\n"); cat(" [-basin string]\n"); cat(" [-amodel string]\n"); cat(" [-by string]\n"); cat(" [-lead n]\n"); cat(" [-ri_window n]\n"); cat(" [-ri_thresh n]\n"); cat(" [-ri_bdelta_thresh n]\n"); cat(" [-bdelta_max]\n"); cat(" [-save]\n"); cat("\n"); } ######################################################################## # # read.grep in_file pattern # ######################################################################## read.grep = function(in_file, pattern) { tmp_file = "/tmp/plot_probri_read_grep"; run_cmd = paste("grep", pattern, in_file, ">", tmp_file); system(run_cmd); d = read.table(tmp_file, header=TRUE); system(paste("rm -f", tmp_file)); return(d); } ######################################################################## # # check.thresh(d, thr_typ, thr_val) # ######################################################################## check.thresh = function(d, typ, val) { if(typ == "lt" || typ == "<" ) { return(d < val); } else if(typ == "le" || typ == "<=") { return(d <= val); } else if(typ == "eq" || typ == "==") { return(d == val); } else if(typ == "ne" || typ == "!=") { return(d != val); } else if(typ == "ge" || typ == ">=") { return(d >= val); } else if(typ == "gt" || typ == ">" ) { return(d > val); } else { cat("ERROR: unexpected threshold type:", typ, "\n"); quit("no"); } } ######################################################################## # # build case information # ######################################################################## define.case = function(d) { if(nchar(by) > 0) { for(col in unlist(strsplit(toupper(by), ','))) { if(is.null(d$CASE)) { d$CASE = d[,col]; } else { d$CASE = paste(d$CASE, "_", d[,col], sep=''); } } } else { d$CASE = "ALL"; } return(d$CASE); } ######################################################################## # # Constants. # ######################################################################## outdir = "."; tcst_dump_file = paste("/tmp/plot_probri_tcst_", Sys.getpid(), ".tcst", sep=''); tcst_out_file = paste("/tmp/plot_probri_tcst_", Sys.getpid(), ".out", sep=''); color_list = c("black", "red", "green", "blue", "purple", "orange", "black", "red", "green", "blue", "purple", "orange", "black", "red", "green", "blue", "purple", "orange"); lty_list = rep(c(1, 2, 4, 1, 2, 4, 2, 4, 1, 2, 4, 1, 4, 2, 1, 4, 2, 1), 100); # Defaults file_list = c(); basin = ""; amodel = ""; by = ""; lead = 24; ri_window = 24; ri_thresh = 30; ri_bdelta_thresh = "ge30"; ri_exact = TRUE; save = FALSE; ######################################################################## # # Handle the arguments. # ######################################################################## # Retreive the arguments args = commandArgs(TRUE); # Check the number of arguments if(length(args) < 2) { usage(); quit(); } # Parse optional arguments i=1; while(i <= length(args)) { if(args[i] == "-lookin") { while(i+1 <= length(args) & substring(args[i+1], 1, 1) != '-') { file_list = c(file_list, args[i+1]); i=i+1; } } else if(args[i] == "-outdir") { outdir = args[i+1]; i=i+1; } else if(args[i] == "-basin") { basin = args[i+1]; i=i+1; } else if(args[i] == "-amodel") { amodel = args[i+1]; i=i+1; } else if(args[i] == "-by") { by = args[i+1]; i=i+1; } else if(args[i] == "-lead") { lead = args[i+1]; i=i+1; } else if(args[i] == "-ri_window") { ri_window = args[i+1]; i=i+1; } else if(args[i] == "-ri_thresh") { ri_thresh = args[i+1]; i=i+1; } else if(args[i] == "-ri_bdelta_thresh") { ri_bdelta_thresh = args[i+1]; i=i+1; } else if(args[i] == "-bdelta_max") { ri_exact = FALSE; } else if(args[i] == "-save") { save = TRUE; } else { cat("ERROR: Unrecognized command line argument:", args[i], "\n"); usage(); quit(); } # Increment count i=i+1; } # end while # Check for empty input file list if(length(file_list) == 0) { cat("ERROR: Must specify input data files using the -lookin option.\n"); quit(status=1); } # Expand any wildcards in the input file list file_list = system(paste("ls -1", paste(file_list, collapse=" ")), intern=TRUE); ######################################################################## # # Run a tc_stat filter job to subset the data. # ######################################################################## # Build tc_stat command # Toss out BDECK Land Points run_cmd = paste(tc_stat, paste(" -lookin", file_list, collapse=' '), " -job probri", " -out_line_type PCT,PSTD,PRC,PJC", " -column_thresh BDLAND gt0", " -column_thresh RI_WINDOW ==", ri_window, " -lead ", lead, " -probri_thresh ", ri_thresh, " -probri_bdelta_thresh ", ri_bdelta_thresh, " -probri_exact ", ri_exact, " -dump_row ", tcst_dump_file, " -out ", tcst_out_file, " -v 3", sep=''); # Basin option if(nchar(basin) > 0) { run_cmd = paste(run_cmd, "-basin", basin); basin_str = basin; } else { basin_str = "ALL"; } # AModel option if(nchar(amodel) > 0) { run_cmd = paste(run_cmd, "-amodel", amodel); amodel_str = amodel; } else { amodel_str = "ALL"; } # By option if(nchar(by) > 0) { run_cmd = paste(run_cmd, "-by", by); } # Run the tc_stat command and check the return status cat("CALLING:", run_cmd, "\n"); status = system(run_cmd); if(status != 0) { cat("ERROR: Bad return value ", status , "\n"); quit(status=status); } # Parse ri_bdelta_thresh to extract hi and lo thresholds thr1_typ = thr2_typ = thr1_val = thr2_val = NA; thr_tok = strsplit(ri_bdelta_thresh, "[&,\\]")[[1]]; for(i in 1:length(thr_tok)) { if(nchar(thr_tok[i]) == 0) { next; } if(is.na(thr1_typ)) { thr1_typ = gsub('-|0|1|2|3|4|5|6|7|8|9', '', thr_tok[i]); thr1_val = gsub('lt|le|gt|ge', '', thr_tok[i]); } else { thr2_typ = gsub('0|1|2|3|4|5|6|7|8|9', '', thr_tok[i]); thr2_val = gsub('lt|le|gt|ge', '', thr_tok[i]); } } # Reformat ri_bdelta_thresh old_str = c("\\", "lt", "le", "eq", "ne", "ge", "gt", "&&", "||"); new_str = c( "", "<", "<=", "==", "!=", ">=", ">", " and ", " or "); bdelta_thresh_str = ri_bdelta_thresh; for(i in 1:length(old_str)) { bdelta_thresh_str = gsub(old_str[i], new_str[i], bdelta_thresh_str, fixed=TRUE); } # Read the TCST data allowing for ragged arrays cat("Reading PROBRI data:", tcst_dump_file, "\n"); tcst = read.table(tcst_dump_file, header=TRUE, fill=TRUE); # Select probability value from each line tcst$PROB = NA; for(i in 1:dim(tcst)[1]) { n = tcst$N_THRESH[i]; for(j in 1:n) { thr_col = paste("THRESH_", j, sep=""); # Check that the threshold column exists if(!(thr_col %in% colnames(tcst))) { cat("ERROR: Column name \"", thr_col, "\" not found for line number ", i, " of TCST file: ", tcst_dump_file, "\n", sep=""); quit(status=1); } # Look for a match if(ri_thresh == tcst[[thr_col]][i]) { tcst$PROB[i] = tcst[[paste("PROB_", j, sep="")]][i]; break; } } } # Apply thresholds to determine events if(is.na(thr2_typ)) { # Apply single threshold ind = check.thresh(tcst$BDELTA, thr1_typ, thr1_val); } else { # Apply two thresholds ind = check.thresh(tcst$BDELTA, thr1_typ, thr1_val) & check.thresh(tcst$BDELTA, thr1_typ, thr1_val); } tcst$EVENT = NA; tcst$EVENT[ind] = "Y" tcst$EVENT[!ind] = "N"; cat("Reading PROBRI output:", tcst_out_file, "\n"); pct = read.grep(tcst_out_file, "PROBRI_PCT"); pstd = read.grep(tcst_out_file, "PROBRI_PSTD"); prc = read.grep(tcst_out_file, "PROBRI_PRC"); pjc = read.grep(tcst_out_file, "PROBRI_PJC"); ######################################################################## # # Preprocess the tcst data # ######################################################################## # Format time strings tcst$INIT_TIME = as.POSIXct(strptime(tcst$INIT, format="%Y%m%d_%H%M%s")); tcst$VALID_TIME = as.POSIXct(strptime(tcst$VALID, format="%Y%m%d_%H%M%s")); tcst$LEAD_HR = tcst$LEAD/10000; # Handle BDELTA_MAX if(ri_exact) { BDelta_str = "BDELTA"; } else { tcst$BDELTA = tcst$BDELTA_MAX; BDelta_str = "BDELTA_MAX"; } subtitle = paste(ri_thresh, "kt Change Probability ", BDelta_str, " ", bdelta_thresh_str, ", ", length(tcst$PROB), " probs (", sum(tcst$PROB>0), " non-zero), ", "Basin: ", basin_str,", Model: ", amodel_str, ", Lead: ", lead, sep=''); out_base = paste(outdir, "/PROBRI_", basin_str, "_", amodel_str, "_F", lead, "_RI", ri_window, "_thresh", ri_thresh, sep=''); out_base = gsub(",", "_", out_base); library(verification); library(graphics); # Append the non-zero probability counts tcst$CASE = define.case(tcst); pstd$CASE = define.case(pstd); pstd$NONZERO = NA; for(i in 1:dim(pstd)[1]) { ind = tcst$CASE == pstd$CASE[i]; pstd$NONZERO[i] = sum(tcst$PROB[ind]>0); } # Write the PSTD stats out out_file = paste(out_base, "_pstd.txt", sep=''); print(paste("Writing:", out_file)); write.table(pstd, out_file, quote=FALSE, row.names=FALSE); # Open up the output device out_file = paste(out_base, ".pdf", sep=''); print(paste("Writing:", out_file)); pdf(out_file, height=8.5, width=11, useDingbats=FALSE); ######################################################################## # # Create histogram of probability values # ######################################################################## brate = sum(tcst$EVENT == "Y") / length(tcst$EVENT); # Open the output device cat(paste("Creating image file:", out_file, "\n")); hist(tcst$PROB, breaks=seq(0,100,1), freq=FALSE, main=paste("Histogram of", ri_window, "hr", ri_thresh, "kt Change Probabilities\n", BDelta_str, bdelta_thresh_str, "Rate =", round(brate, 4)), xlab="Probability (%)", sub=subtitle); ######################################################################## # # Create discrimination plot # ######################################################################## cat("Plotting discrimination...\n"); discrimination.plot(tcst$EVENT, tcst$PROB, main=paste(ri_window, "hr", ri_thresh, "kt Change Probability Discrimination\n", subtitle, sep=''), xlab="Probability (%)"); ######################################################################## # # Create scatter plot # ######################################################################## cat("Plotting scatter...\n"); # Colors for plotting points colors <- rgb(0, 0, 1, 0.25) plot(tcst$PROB, tcst$BDELTA, col=colors, pch=19, main=paste(ri_window, "hr", ri_thresh, "kt Change Probability vs BDECK Change"), ylab=paste(BDelta_str, "(kts)"), xlab="Probability (%)", sub=subtitle); # Draw reference lines for each threshold if(!is.na(thr1_val)) abline(h=thr1_val, col="red", lwd=2.0); if(!is.na(thr2_val)) abline(h=thr2_val, col="red", lwd=2.0); ######################################################################## # # Create ROC curve. # ######################################################################## cat("Plotting ROC...\n"); plot(x=c(0,1), y=c(0,1), type="n", xlab="POFD", ylab="PODY", main=paste(ri_window, "hr RI Probability ROC"), sub=subtitle); # Add reference lines abline(h=0, col="gray", lwd=2); abline(v=0, col="gray", lwd=2); abline(0, 1, col="gray", lwd=2, lty=2); for(i in 1:dim(prc)[1]) { pody = unlist(c(1, prc[i, grep("PODY", names(prc))], 0)); pofd = unlist(c(1, prc[i, grep("POFD", names(prc))], 0)); lines(x=pofd, y=pody, col=color_list[i], lty=lty_list[i], lwd=2); } prc$CASE = define.case(prc); par(bg="white"); legend("bottomright", paste(as.character(prc$CASE), ": Area = ", round(pstd$ROC_AUC, 4), ", Reli = ", round(pstd$RELIABILITY, 4), ", Brier = ", round(pstd$BRIER, 4), sep=''), col=color_list, lty=lty_list, lwd=2); ######################################################################## # # Create Reliability Diagram. # ######################################################################## cat("Plotting reliability...\n"); # Probability mid-points probs = pct[1, grep("THRESH_", names(pct))]; y.i = as.numeric((probs[1:(length(probs)-1)] + probs[2:length(probs)])/2); # Observed rate = oy / (oy + on) oy = pct[, grep("OY_", names(pct))]; on = pct[, grep("ON_", names(pct))]; obar.i = as.matrix(oy / (oy + on)); # Probability forecast rate = (oy + on) / total total = pct[, "TOTAL"]; prob.y = as.matrix((oy + on) / total); if(nchar(by) > 0) { for(col in unlist(strsplit(toupper(by), ','))) { if(is.null(pct$CASE)) { pct$CASE = pct[,col]; } else { pct$CASE = paste(pct$CASE, "_", pct[,col], sep=''); } } } else { pct$CASE = "ALL"; } reliability.plot(y.i, t(obar.i), t(prob.y), titl=paste(ri_window, "hr RI Reliability Diagram\n", subtitle), legend.names=as.character(pct$CASE)); ######################################################################## # # Clean up. # ######################################################################## # Finished with the plots dev.off(); # List the completed output file print(paste("Finished:", out_file)); # Optionally, save all of the data to an .RData file if(save == TRUE) save.image(); # Dispose of the temporary files run_cmd = paste("rm -f", tcst_dump_file, tcst_out_file); cat("CALLING: ", run_cmd, "\n"); status = system(run_cmd);