r - Find a block of steady column values -
5can give me hint speed following program? situation: have huge amount of measurement data. need extract data "10 minutes stable operation conditions" of 5 parameters i.e. column values.
here (working, slow) solution: - take first 10 rows dataframe - compare min , max of each column first value of column - if @ least 1 column min or max not within tolerance, delete first row, repeat - if within tolerance, calculate mean of results, store them, delete 10 rows, repeat. - break when dataframe has less 10 rows
since using repeat loop, takes 30min extract 610 operation points 86.220 minutes of data.
any appreciated. thanks!
edit: created code explain. please note deleted checking routines na values , standby operation (values around 0):
n_cons<-5 # number of consistent minutes? ### function check wheter value within tolerance f_cons<-function(min,max,value,tol){ z<-max > (value + tol) | min < (value - tol); return(z) } # define +/- tolerances vu_1_tol<-5 # f_ht vu_2_tol<-5 # f_lt # create empty result map map<-c(rep(na,3)) dim(map)<- c(1,3) colnames(map)<-list("f_ht","f_lt","result") system.time( repeat{ # criteria break if(nrow(t6)<n_cons){break} # subset of data check t_check<-null t_check<-cbind(t6$f_ht[1:n_cons], t6$f_lt[1:n_cons] ) # check consistency if(f_cons(min(t_check[,1]),max(t_check[,1]),t_check[1,1],vu_1_tol)){t6<-t6[-1,] next} if(f_cons(min(t_check[,2]),max(t_check[,2]),t_check[1,2],vu_2_tol)){t6<-t6[-1,] next} # if repeat loop passes consistency check, store means attach(t6[1:n_cons,]) # create new row wih means of steady block new_row<-c(mean(f_ht),mean(f_lt),mean(result)) new_row[-1]<-round(as.numeric(new_row[-1]),2) map<-rbind(map,new_row) # attach new steady point map detach(t6[1:n_cons,]) t6<-t6[-(1:n_cons),] # delete evaluated lines data } )
the data using looks this
t6<-structure(list(f_ht = c(1499.71, 1500.68, 1500.44, 1500.19, 1500.31, 1501.76, 1501, 1551.22, 1500.01, 1500.52, 1499.53, 1500.78, 1500.65, 1500.96, 1500.25, 1500.76, 1499.49, 1500.24, 1500.47, 1500.25, 1735.32, 2170.53, 2236.08, 2247.48, 2250.71, 2249.59, 2246.68, 2246.69, 2248.27, 2247.79), f_lt = c(2498.96, 2499.93, 2499.73, 2494.57, 2496.94, 2507.71, 2495.67, 2497.88, 2499.63, 2506.18, 2495.57, 2504.28, 2497.38, 2498.66, 2502.17, 2497.78, 2498.38, 2501.06, 2497.75, 2501.32, 2500.79, 2498.17, 2494.82, 2499.96, 2498.5, 2503.47, 2500.57, 2501.27, 2501.17, 2502.33), result = c(9125.5, 8891.5, 8624, 8987, 9057.5, 8840.5, 9182, 8755.5, 9222.5, 9079, 9175.5, 9458.5, 9058, 9043, 9045, 9309, 9085.5, 9230, 9346, 9234, 9636.5, 9217.5, 9732.5, 9452, 9358, 9071.5, 9063.5, 9016.5, 8591, 8447.5)), .names = c("f_ht", "f_lt", "result"), row.names = 85777:85806, class = "data.frame")
with code , data, 3 steady operation points, want, slow.
hopefully, helps better explain problem.
heureka! comment of carl witthoft, able speed proces factor 15! used rollapply lot, because rollmean , rollmax had problems na did not occur when using rollaply. help!
here did used same data before:
# use values needed check stability t7<-as.data.frame(cbind(t6$f_ht,t6$f_lt)) n_cons<-5 # number of consistent minutes? # calculate mean values each column on 5 rows t7_rm<-rollapply(t7,n_cons,mean,align = "left") colnames(t7_rm)<-c("mean_f_ht","mean_f_lt") # idem maximum t7_max<-rollapply(t7,width=n_cons,fun=max, na.rm = f,align = "left") colnames(t7_max)<-c("max_f_ht","max_f_lt") # idem minimum t7_min<-rollapply(t7,width=n_cons,fun=min, na.rm = f,align = "left") colnames(t7_min)<-c("min_f_ht","min_f_lt") # create table maximum absolute daviation mean values t7_dif<-pmax((t7_max-t7_rm[1:nrow(t7_max),]),(t7_rm[1:nrow(t7_min),]-t7_min)) colnames(t7_dif)<-c("diff_f_ht","diff_f_lt") # enter tolerance limits v1_tol<-50 # f_ht v2_tol<-50 # f_lt # create tolerance table t7_tol<-cbind(rep(v1_tol,nrow(t7_dif)),rep(v2_tol,nrow(t7_dif))) # create logical table true or false depending on if max deviation within tolerance t7_check<-(t7_dif<t7_tol) # replace "false" "na" (in order use is.na) t7_check_na<-apply(t7_check,c(1,2),function(x) {ifelse(x==false,na,x)}) # create rolling mean on complete data t6_rm<-rollapply(t6,n_cons,mean,na.rm=true,align = "left") # create map of stable operation points means of parameters , result t6_map<-t6_rm[complete.cases(t7_check_na),]
the result differs original one, because no lines omitted. works me.
Comments
Post a Comment