#we download the data from https://tcga-data.nci.nih.gov/docs/publications/gbm_exp/ # for 202 samples type = read.table("E:/backup (2017.7.10)/from desktop/xiaoying/realdata/Broad202.txt", sep = "\t", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) type = read.table("E:/backup (2017.7.10)/from desktop/xiaoying/realdata/UNC202.txt", sep = "\t", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) #type = read.table("E:/backup (2017.7.10)/from desktop/xiaoying/realdata/LBL202.txt", sep = "\t", header = TRUE, #check.names = FALSE, stringsAsFactors = FALSE) dim(type) head(type) type[3217,] vc20 <- c("EGFR","PDGFRA","FGFR3","PIK3C2B","PIK3R1","PIK3R3","PIK3IP1","AKTIP","NFIB","CDKN3","CDK4","CDKN1A","CDKN2C","CCND2","CASP1","CASP4","RASGRP3", "RRAS","IDH1","FOXM1") indices <- which(type[,1] == "EGFR") indices <- which(type[,1] == vc20) gene20 <- matrix(0,202,20) dim(gene20) for (i in 1:20) { indices <- which(type[,1] == vc20[i]) gene20[,i] <- as.numeric(type[indices,][,2:203]) } gene20 #length(as.numeric(type[indices,][,2:202])) p = 20 n = 202 #center_colmeans <- function(x) { # xcenter = colMeans(x) # x - rep(xcenter, rep.int(nrow(x), ncol(x))) #} #subset.centered <- center_colmeans(gene20) x <- as.matrix(gene20) dim(x) cov(x) solve(cov(x)) max(solve(cov(x))) min(solve(cov(x))) color20 <- color[[1]] color20 ########## vc10 <- c("TP53","PTEN","NF1","EGFR","IDH1","PIK3R1","RB1","ERBB2","PIK3CA","PDGFRA") gene10 <- matrix(0,202,10) dim(gene10) for (i in 1:10) { indices <- which(type[,1] == vc10[i]) gene10[,i] <- as.numeric(type[indices,][,2:203]) } gene10 p = 10 n = 173 #center_colmeans <- function(x) { # xcenter = colMeans(x) # x - rep(xcenter, rep.int(nrow(x), ncol(x))) #} #subset.centered <- center_colmeans(gene10) x <- as.matrix(gene10) cov(x) solve(cov(x)) max(solve(cov(x))) min(solve(cov(x))) color10 <- color[[1]] color10 #################################### s11861 = read.table("E:/backup (2017.7.10)/from desktop/xiaoying/realdata/unifiedScaled.txt", sep = "\t", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) x1 = s11861 mydata <- matrix(0,dim(x1)[2],dim(x1)[1]) for(i in 1:n) { mydata[i,] = as.numeric(x1[,i]) } dim(mydata) n=202 p=50 set.seed(1000) ccr <- sample(1:11861,p,replace=F) ccr x <- mydata[,ccr] dim(x) str(x) cov(x) solve(cov(x)) ###################################### #x <- gene10 #indices <- which(type[,1] == "A2BP1") #indices #for 840 genes and 173 samples subtype = read.table("E:/backup (2017.7.10)/from desktop/xiaoying/realdata/TCGA_unified_CORE_ClaNC840.txt", sep = "\t", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) subtype = read.table("TCGA_unified_CORE_ClaNC840.txt", sep = "\t", header = TRUE, check.names = FALSE, stringsAsFactors = FALSE) getwd() dir() #setwd("E:/backup (2017.7.10)/from desktop/xiaoying/realdata/") dim(subtype) head(subtype) subtype[1:20,1:3] subtype[1,2:10] realdata <- subtype[c(-1),c(-1,-2)] dim(realdata) head(realdata) n = 173 p = 100 x1 <- realdata[1:840,1:n] dim(x1) #str(x1) mydata <- matrix(0,dim(x1)[2],dim(x1)[1]) for(i in 1:n) { mydata[i,] = as.numeric(x1[,i]) } set.seed(1000) ccr <- sample(1:840,p,replace=F) ccr subtype[2:840,1][569] vcrandom <- subtype[2:840,1][ccr] # the names of selected genes mydata[,569] subGSE <- mydata[,ccr] ## the subset data I am going to do the clustering x <- as.matrix(subGSE) x <- exp(x) # the real data is log (see verhaak 2010 p107) dim(x) str(x) cov(x) solve(cov(x)) v <- MBVB(solve(cov(x)),0.5) v[[1]] colorrandom <- color[[1]] colorrandom write.table(color[[1]], file="E:/backup (2017.7.10)/from desktop/xiaoying/simulations/gene100.txt", append=T, row.names=FALSE, col.names=FALSE, sep=" ",eol = "\n") colorrandom <- read.table("E:/backup (2017.7.10)/from desktop/xiaoying/simulations/gene100.txt", header=F) colorrandom <- as.matrix(colorrandom) write.table(color[[1]], file="gene100.txt", append=T, row.names=FALSE, col.names=FALSE, sep=" ",eol = "\n") colorrandom100 <- read.table("gene100.txt", header=F) colorrandom100 <- as.matrix(colorrandom100) #the end # define the filename filename <- "Sonar.csv" # load the CSV file from the local directory dataset <- read.csv("E:/backup (2017.7.10)/from desktop/xiaoying/realdata/realdata_sonar/sonar.all-data", header=FALSE) # preview the first 5 rows head(dataset) dim(dataset) dataset[,61] subset <- dataset[,1:60] head(subset) dim(subset) p <- dim(subset)[2] n <- dim(subset)[1] center_colmeans <- function(x) { xcenter = colMeans(x) x - rep(xcenter, rep.int(nrow(x), ncol(x))) } subset.centered <- center_colmeans(subset) x <- as.matrix(subset.centered) head(x) dim(x) cov(x) solve(cov(x)) write.table(best, file="realdata.txt", append=T, row.names=FALSE, col.names=FALSE, sep=" ",eol = "\n") realeatimated <- read.table("realdata.txt", header=F) realeatimated cov(subset) ek <- solve(cov(x)) max(ek) min(ek) source("http://bioconductor.org/biocLite.R") biocLite() install.packages("GSVAdata") #install.packages("scales") #library(scales) #subset.rescaled <- subset-min(subset)/(max(subset)-min(subset)) #head(subset.rescaled) ek <- solve(cov(subset.centered)) dim(ek) max(ek) min(ek)