Save the madgraph to do brushing with iplots.

To illustrate the data, focus on Madison.

source("http://pages.stat.wisc.edu/~karlrohe/netsci/code/loadData.R")
## 
Read 4.5% of 14080052 rows
Read 10.1% of 14080052 rows
Read 16.5% of 14080052 rows
Read 23.1% of 14080052 rows
Read 36.4% of 14080052 rows
Read 51.8% of 14080052 rows
Read 62.6% of 14080052 rows
Read 77.5% of 14080052 rows
Read 92.3% of 14080052 rows
Read 14080052 rows and 5 (of 5) columns from 0.620 GB file in 00:00:13
## 
Read 0.0% of 2117987 rows
Read 9.0% of 2117987 rows
Read 11.3% of 2117987 rows
Read 13.7% of 2117987 rows
Read 16.5% of 2117987 rows
Read 19.4% of 2117987 rows
Read 23.1% of 2117987 rows
Read 27.9% of 2117987 rows
Read 33.5% of 2117987 rows
Read 38.2% of 2117987 rows
Read 45.3% of 2117987 rows
Read 54.3% of 2117987 rows
Read 66.1% of 2117987 rows
Read 77.4% of 2117987 rows
Read 81.7% of 2117987 rows
Read 93.0% of 2117987 rows
Read 2117987 rows and 43 (of 43) columns from 0.606 GB file in 00:00:34
## Loading required package: sp
wi = DT[State == "WI"]
tmp = Et[unique(wi$NPI)]  
Ewi = tmp[complete.cases(tmp)]  #lots of NA's.  Have not inspected why.
el=as.matrix(Ewi)[,1:2] #igraph needs the edgelist to be in matrix format
g=graph.edgelist(el,directed = F) # this creates a graph.
ids = V(g)$name
cities = wi[ids, mult = "first"]$City
g = set.vertex.attribute(g, name = "city", index=V(g),value =  cities)
wig = g
madgraph = induced.subgraph(graph = g,vids = which(V(g)$city == "MADISON"))
core = graph.coreness(madgraph)
madgraph = induced.subgraph(graph = madgraph,vids = core>1)
plot(madgraph, vertex.label = NA)

Let’s color the nodes in this figure by some other interesting attributes in DT.

colnames(DT)
##  [1] "NPI"                                                         
##  [2] "PAC ID"                                                      
##  [3] "Professional Enrollment ID"                                  
##  [4] "Last Name"                                                   
##  [5] "First Name"                                                  
##  [6] "Middle Name"                                                 
##  [7] "Suffix"                                                      
##  [8] "Gender"                                                      
##  [9] "Credential"                                                  
## [10] "Medical school name"                                         
## [11] "Graduation year"                                             
## [12] "Primary specialty"                                           
## [13] "Secondary specialty 1"                                       
## [14] "Secondary specialty 2"                                       
## [15] "Secondary specialty 3"                                       
## [16] "Secondary specialty 4"                                       
## [17] "All secondary specialties"                                   
## [18] "Organization legal name"                                     
## [19] "Organization DBA name"                                       
## [20] "Group Practice PAC ID"                                       
## [21] "Number of Group Practice members"                            
## [22] "Line 1 Street Address"                                       
## [23] "Line 2 Street Address"                                       
## [24] "Marker of address line 2 suppression"                        
## [25] "City"                                                        
## [26] "State"                                                       
## [27] "Zip Code"                                                    
## [28] "Claims based hospital affiliation CCN 1"                     
## [29] "Claims based hospital affiliation LBN 1"                     
## [30] "Claims based hospital affiliation CCN 2"                     
## [31] "Claims based hospital affiliation LBN 2"                     
## [32] "Claims based hospital affiliation CCN 3"                     
## [33] "Claims based hospital affiliation LBN 3"                     
## [34] "Claims based hospital affiliation CCN 4"                     
## [35] "Claims based hospital affiliation LBN 4"                     
## [36] "Claims based hospital affiliation CCN 5"                     
## [37] "Claims based hospital affiliation LBN 5"                     
## [38] "Professional accepts Medicare Assignment"                    
## [39] "Participating in eRx"                                        
## [40] "Participating in PQRS"                                       
## [41] "Participating in EHR"                                        
## [42] "Received PQRS Maintenance of Certification Program Incentive"
## [43] "Participated in Million Hearts"
features = colnames(DT)[c(8:12, 18,19, 21, 28)]
features
## [1] "Gender"                                 
## [2] "Credential"                             
## [3] "Medical school name"                    
## [4] "Graduation year"                        
## [5] "Primary specialty"                      
## [6] "Organization legal name"                
## [7] "Organization DBA name"                  
## [8] "Number of Group Practice members"       
## [9] "Claims based hospital affiliation CCN 1"
ids = V(madgraph)$name
tmp = wi[ids, mult = "last"]
atbs = tmp[,features, with = F]  # Thank you google for helping to find "with"
mean(complete.cases(atbs))
## [1] 1
atbs = as.matrix(atbs)
for(i in 1:ncol(atbs)){
madgraph = set.vertex.attribute(madgraph, name = colnames(atbs)[i], index=V(madgraph),value =  atbs[,i])
}
summary(madgraph)
## IGRAPH UN-- 474 3706 -- 
## attr: name (v/c), city (v/c), Gender (v/c), Credential (v/c),
##   Medical school name (v/c), Graduation year (v/c), Primary
##   specialty (v/c), Organization legal name (v/c), Organization DBA
##   name (v/c), Number of Group Practice members (v/c), Claims based
##   hospital affiliation CCN 1 (v/c)

Now, let’s plot it with several different colorings. The thing that takes the longest in plotting is computing the node locations (extensive field of algorithmic study!).

locs = layout.fruchterman.reingold(madgraph)
madStuff = cbind(locs, atbs)
save(madStuff, file = "~/dataFiles/physicianReferral/madStuff.RData")