forked from paulgp/applied-methods-phd
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathload_data_ml1.R
More file actions
642 lines (637 loc) · 34.8 KB
/
Copy pathload_data_ml1.R
File metadata and controls
642 lines (637 loc) · 34.8 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
library(readr)
library(tidyverse)
library(lubridate)
library(stringr)
library(furrr)
library(caret)
#
# files <- dir(pattern = "tbFA_QuickSearch_.*.zip")
files <- c("tbFA_QuickSearch_SMART_II.zip")
data_path <- "/Users/psg24/Dropbox/Papers/GGP_Brokers/MLSRawData/"
i = 1
fn <- paste0(data_path, files[i])
# print(fn)
# decompression <- system2("unzip",
# args = c("-o", # include override flag
# fn),
# stdout = TRUE)
# # Test for success criteria
# # change the search depending on
# # your implementation
# if (grepl("Warning message", tail(decompression, 1))) {
# print(decompression)
# }
# fn2 <- paste0(strsplit(fn, "\\.")[[1]][1], ".txt")
# df <- read_delim(fn2,
# "|", escape_double = FALSE, trim_ws = TRUE,
# col_types = cols(
# FA_APN = col_character(),
# FA_BasementFinPct =col_double(),
# FA_BathCalcInt =col_integer(),
# FA_BathsTotal =col_double(),
# FA_CloseDate =col_datetime(),
# FA_ContractDate =col_datetime(),
# FA_DOM =col_integer(),
# FA_DOM_DT =col_datetime(),
# FA_EntryDate =col_datetime(),
# FA_GarageStyle = col_character(),
# FA_GatedCommunity = col_character(),
# FA_IsCurrentListing =col_logical(),
# FA_LandUse = col_character(),
# FA_ListDate =col_datetime(),
# FA_ListID = col_character(),
# FA_ListStatus = col_character(),
# FA_ListStatusCategoryCode = col_character(),
# FA_OffMarketDate =col_datetime(),
# FA_OriginalListDate =col_datetime(),
# FA_Ownership = col_character(),
# FA_Pool = col_character(),
# FA_PostDate =col_datetime(),
# FA_PricePerSqFt =col_double(),
# FA_PrimaryCity = col_character(),
# FA_PropertySubType = col_character(),
# FA_PropertyType = col_character(),
# FA_Rent_Sale_ind = col_character(),
# FA_SquareFeet =col_integer(),
# FA_StatusChangeDate =col_datetime(),
# FA_StoriesCode = col_character(),
# FA_StoriesNbr =col_double(),
# FA_Style = col_character(),
# FA_YearBuilt =col_double(),
# FA_YearBuiltEffective =col_double(),
# FA_YearBuiltNewConstruction =col_double(),
# ListingServiceName = col_character(),
# ListingServiceNameAbbrev = col_character(),
# ListingServiceNameCode = col_character(),
# PropertyTypeID =col_integer(),
# PropertyTypeStandardName = col_character(),
# SysPropertyID = col_character(),
# AboveGradeFinishedArea =col_double(),
# AdditionalArea =col_double(),
# AddressBoxNumber = col_character(),
# AddressCarrierRoute = col_character(),
# AddressCity = col_character(),
# AddressCountry = col_character(),
# AddressCountyOrParish = col_character(),
# AddressPostalCode = col_character(),
# AddressPostalCodePlus4 = col_character(),
# AddressStateOrProvince = col_character(),
# AddressStreetAdditionalInfo = col_character(),
# AddressStreetAddress = col_character(),
# AddressStreetDirPrefix = col_character(),
# AddressStreetDirSuffix = col_character(),
# AddressStreetName = col_character(),
# AddressStreetNumber = col_character(),
# AddressStreetSuffix = col_character(),
# AddressUnitNumber = col_character(),
# Age =col_double(),
# Amenities1 = col_character(),
# Amenities2 = col_character(),
# AnimalPolicyPermitted =col_logical(),
# AnimalPolicyPermittedTypes = col_character(),
# Appliances = col_character(),
# ArchitecturalStyle = col_character(),
# AssociationAmenities = col_character(),
# AssociationFee =col_double(),
# AssociationFee2 =col_double(),
# AssociationFee2Description = col_character(),
# AssociationFeeIncludes = col_character(),
# AssociationFeePeriod = col_character(),
# AssociationFeePeriod2 = col_character(),
# AssociationMgmtCo = col_character(),
# AssociationMgmtCoAddr = col_character(),
# AssociationMgmtCoCity = col_character(),
# AssociationMgmtCoPhone = col_character(),
# AssociationMgmtCoPostalCode = col_character(),
# AssociationMgmtCoState = col_character(),
# AssociationName = col_character(),
# AssociationYN = col_character(),
# AttachedYN = col_character(),
# AtticType = col_character(),
# basementFinishPct = col_character(),
# Baths1Qtr =col_double(),
# Baths3Qtr =col_double(),
# BathsFull =col_double(),
# BathsHalf =col_double(),
# BathsPartial =col_double(),
# BathsTotal = col_character(),
# Beds =col_double(),
# Board = col_character(),
# BoardSubBoard = col_character(),
# BoatDockDescription = col_character(),
# BoatDockYN = col_character(),
# BrokerName = col_character(),
# BrokerPhone = col_character(),
# BuilderModel = col_character(),
# BuilderName = col_character(),
# BuilderTractCode = col_character(),
# BuildingAreaRange = col_character(),
# BuildingAreaSource = col_character(),
# BuildingAreaTotal =col_double(),
# BuyerAgentCellPhone = col_character(),
# BuyerAgentEmail = col_character(),
# BuyerAgentFax = col_character(),
# BuyerAgentID = col_character(),
# BuyerAgentName = col_character(),
# BuyerAgentPhonePrimary = col_character(),
# BuyerAgentURL = col_character(),
# BuyerBrokerID = col_character(),
# BuyerCoAgentCellPhone = col_character(),
# BuyerCoAgentEmail = col_character(),
# BuyerCoAgentFax = col_character(),
# BuyerCoAgentID = col_character(),
# BuyerCoAgentName = col_character(),
# BuyerCoAgentPhonePrimary = col_character(),
# BuyerOfficeCity = col_character(),
# BuyerOfficeCountry = col_character(),
# BuyerOfficeEmail = col_character(),
# BuyerOfficeFax = col_character(),
# BuyerOfficeID = col_character(),
# BuyerOfficeName = col_character(),
# BuyerOfficeOfficePhone = col_character(),
# BuyerOfficePostalCode = col_character(),
# BuyerOfficeState = col_character(),
# BuyerOfficeStreetAddress = col_character(),
# BuyerTeamDisplayName = col_character(),
# cancellationDate =col_datetime(),
# CAPRate = col_character(),
# CarportYN = col_character(),
# CensusTract = col_character(),
# CloseDate =col_datetime(),
# ClosePrice =col_double(),
# ClosePriceHigh =col_double(),
# ClosePriceLow =col_double(),
# CoBuyerAgentURL = col_character(),
# CoBuyerOfficeAOR = col_character(),
# CoBuyerOfficeEmail = col_character(),
# CoBuyerOfficeFax = col_character(),
# CoBuyerOfficeKey = col_character(),
# CoBuyerOfficeMlsId = col_character(),
# CoBuyerOfficeName = col_character(),
# CoBuyerOfficePhone = col_character(),
# CoBuyerOfficePhoneExt = col_character(),
# CoBuyerOfficeURL = col_character(),
# CoListOfficeAOR = col_character(),
# CoListOfficeEmail = col_character(),
# CoListOfficeFax = col_character(),
# CoListOfficeKey = col_character(),
# CoListOfficeMlsId = col_character(),
# CoListOfficeName = col_character(),
# CoListOfficePhone = col_character(),
# CoListOfficePhoneExt = col_character(),
# CoListOfficeURL = col_character(),
# Commission = col_character(),
# CommissionValueType = col_character(),
# CommunityFeatures = col_character(),
# Condition = col_character(),
# ConstructionMaterials = col_character(),
# Contingency = col_character(),
# ContingentDate =col_datetime(),
# Cooling = col_character(),
# CoolingYN = col_character(),
# CropsIncludedYN = col_character(),
# CrossStreet = col_character(),
# DevelopmentStatus = col_character(),
# DirectionFaces = col_character(),
# Directions = col_character(),
# DisabilityFeatures = col_character(),
# DisabilityFeaturesYN = col_character(),
# Disclaimer = col_character(),
# Disclosures = col_character(),
# DistanceFromSchoolBus = col_character(),
# DistanceFromShopping = col_character(),
# DistanceToElectric = col_character(),
# DOM =col_double(),
# DOMCumulative =col_double(),
# DualVariableCompensationYN = col_character(),
# EnergyInformation = col_character(),
# Exclusions = col_character(),
# ExpirationDate =col_datetime(),
# Exterior = col_character(),
# ExteriorFeatures = col_character(),
# Fencing = col_character(),
# Financing = col_character(),
# FireplaceFeatures = col_character(),
# FireplaceFuel = col_character(),
# FireplacesTotal =col_double(),
# FireplacesYN = col_character(),
# FirstMtgAmt =col_double(),
# FloorLocation = col_character(),
# Floors = col_character(),
# ForeclosureYN = col_character(),
# FoundationDetails = col_character(),
# Garage = col_character(),
# GarageArea =col_double(),
# GarageAttachedYN = col_character(),
# GarageSpaces =col_double(),
# GarageStyle = col_character(),
# GarageYN = col_character(),
# GreenBuildingCertification = col_character(),
# GreenCertificationRating = col_character(),
# GreenCertifyingBody = col_character(),
# GreenEnergyEfficient = col_character(),
# GreenEnergyGeneration = col_character(),
# GreenIndoorAirQuality = col_character(),
# GreenLocation = col_character(),
# GreenSustainability = col_character(),
# GreenWaterConservation = col_character(),
# GreenYearCertified = col_character(),
# GrossScheduledIncome =col_double(),
# Heating = col_character(),
# HeatingFuel = col_character(),
# HeatingYN = col_character(),
# HorseFacilities = col_character(),
# HorseFacilitiesYN = col_character(),
# Inclusions = col_character(),
# IncomeExpenses =col_double(),
# IncomeExpensesPeriod = col_character(),
# IncomeGrossIncome =col_double(),
# IncomeGrossIncomePeriod = col_character(),
# IncomeNetIncome =col_double(),
# IncomeNetIncomePeriod = col_character(),
# IncomeRentIncome =col_double(),
# IncomeRentIncomePeriod = col_character(),
# IncomeVacancyFactor =col_double(),
# IncomeVacancyFactorValueType = col_character(),
# InteriorFeatures = col_character(),
# IrrigationSource = col_character(),
# IrrigationWaterRightsAcres = col_character(),
# IrrigationWaterRightsYN = col_character(),
# LeaseConsideredYN = col_character(),
# LeaseDate =col_datetime(),
# ListAgentCellPhone = col_character(),
# ListAgentEmail = col_character(),
# ListAgentFax = col_character(),
# ListAgentFullName = col_character(),
# ListAgentID = col_character(),
# ListAgentPhonePrimary = col_character(),
# ListAgentURL = col_character(),
# ListCoAgentCellPhone = col_character(),
# ListCoAgentEmail = col_character(),
# ListCoAgentFax = col_character(),
# ListCoAgentFullName = col_character(),
# ListCoAgentID = col_character(),
# ListCoAgentPhonePrimary = col_character(),
# ListCoAgentURL = col_character(),
# ListDate =col_datetime(),
# ListingID = col_character(),
# ListingService = col_character(),
# ListingStatus = col_character(),
# ListingStatusCode = col_character(),
# ListingStatusStatusClass = col_character(),
# ListingSubStatus = col_character(),
# ListingTerms = col_character(),
# ListingType = col_character(),
# ListOfficeCity = col_character(),
# ListOfficeCountry = col_character(),
# ListOfficeEmail = col_character(),
# ListOfficeFax = col_character(),
# ListOfficeID = col_character(),
# ListOfficeName = col_character(),
# ListOfficePhone = col_character(),
# ListOfficePostalCode = col_character(),
# ListOfficeState = col_character(),
# ListOfficeStreetAddress = col_character(),
# ListOfficeURL = col_character(),
# ListPrice =col_double(),
# ListPriceHighAmount =col_double(),
# ListPriceLowAmount =col_double(),
# ListPricePrevious = col_character(),
# ListTeamDisplayName = col_character(),
# LivingArea =col_double(),
# LivingArea_Range = col_character(),
# LockBoxLocation = col_character(),
# LockBoxSerialNumber = col_character(),
# LockBoxType = col_character(),
# LotDescription = col_character(),
# LotSizeAreaAcres =col_double(),
# LotSizeAreaSqFeet =col_double(),
# LotSizeDimensions = col_character(),
# LotSizeLength =col_double(),
# LotSizeSource = col_character(),
# LotSizeWidth =col_double(),
# MapCoordinate = col_character(),
# MLSAreaMajorCode = col_character(),
# MLSAreaMajorDescription = col_character(),
# MLSAreaMinor = col_character(),
# ModificationTimestamp =col_datetime(),
# NewConstructionYN = col_character(),
# NumberOfSeparateElectricMeters =col_double(),
# NumberOfSeparateGasMeters =col_double(),
# NumberOfSeparateWaterMeters =col_double(),
# NumberOfTotalUnits =col_double(),
# NumberOfUnitsBuildings =col_double(),
# NumberOfUnitsLeased =col_double(),
# OccupantCellPhone = col_character(),
# OccupantEmail = col_character(),
# OccupantFax = col_character(),
# OccupantHomePhone = col_character(),
# OccupantName = col_character(),
# OccupantOfficePhone = col_character(),
# OccupantPager = col_character(),
# OccupantType = col_character(),
# OffMarketDate =col_datetime(),
# OpenHouse = col_character(),
# OpenHouseDescription = col_character(),
# OpenParkingYN = col_character(),
# OriginalListDate =col_datetime(),
# OriginalListPrice =col_double(),
# OtherImprovements = col_character(),
# OtherParking = col_character(),
# Owner_SellerName = col_character(),
# OwnerPays = col_character(),
# OwnerPhone = col_character(),
# Ownership = col_character(),
# OwnershipType = col_character(),
# ParcelAccess = col_character(),
# ParcelNumber = col_character(),
# ParcelTaxID = col_character(),
# ParkingCarport =col_double(),
# ParkingCoveredParking =col_double(),
# ParkingFeatures = col_character(),
# ParkingOpenParking =col_double(),
# ParkingTotal =col_double(),
# ParkName = col_character(),
# PendingDate =col_datetime(),
# PendingPrice =col_double(),
# PlumbingCode = col_character(),
# PoolFeatures = col_character(),
# PoolYN = col_character(),
# PorchType = col_character(),
# possession = col_character(),
# PresentUse = col_character(),
# PricePerSqFt = col_character(),
# ProjectName = col_character(),
# PropertySubTypeDescription = col_character(),
# PropertyTax =col_double(),
# PropertyType = col_character(),
# PropertyTypeDescription = col_character(),
# PublicRemarks = col_character(),
# PurchaseContractDate =col_datetime(),
# Quality = col_character(),
# Range = col_character(),
# RentalCoAddress = col_character(),
# RentalCoCity = col_character(),
# RentalCompany = col_character(),
# RentalCoPhone = col_character(),
# RentalCoState = col_character(),
# RentalCoZipCode = col_character(),
# RentControlYN = col_character(),
# RentIncludes = col_character(),
# RentPrice =col_double(),
# RentSaleLease = col_character(),
# REO = col_character(),
# REOYN = col_character(),
# ReserveListPrice =col_double(),
# Road = col_character(),
# Roof = col_character(),
# RoomsBasementAmenities = col_character(),
# RoomsBasementArea = col_character(),
# RoomsBasementDescription = col_character(),
# RoomsBasementDimensions = col_character(),
# RoomsBasementFeatures = col_character(),
# RoomsBasementFinPct =col_double(),
# RoomsBasementType = col_character(),
# RoomsBasementYN = col_character(),
# RoomsBathroomFourArea = col_character(),
# RoomsBathroomFourDescription = col_character(),
# RoomsBathroomFourDimensions = col_character(),
# RoomsBathroomFourFeatures = col_character(),
# RoomsBathroomFourLevel = col_character(),
# RoomsBathroomFourYN = col_character(),
# RoomsBathroomOneArea = col_character(),
# RoomsBathroomOneDescription = col_character(),
# RoomsBathroomOneDimensions = col_character(),
# RoomsBathroomOneFeatures = col_character(),
# RoomsBathroomOneLevel = col_character(),
# RoomsBathroomOneYN = col_character(),
# RoomsBathroomThreeArea = col_character(),
# RoomsBathroomThreeDescription = col_character(),
# RoomsBathroomThreeDimensions = col_character(),
# RoomsBathroomThreeFeatures = col_character(),
# RoomsBathroomThreeLevel = col_character(),
# RoomsBathroomThreeYN = col_character(),
# RoomsBathroomTwoArea = col_character(),
# RoomsBathroomTwoDescription = col_character(),
# RoomsBathroomTwoDimensions = col_character(),
# RoomsBathroomTwoFeatures = col_character(),
# RoomsBathroomTwoLevel = col_character(),
# RoomsBathroomTwoYN = col_character(),
# RoomsBedroomFiveArea = col_character(),
# RoomsBedroomFiveDescription = col_character(),
# RoomsBedroomFiveDimensions = col_character(),
# RoomsBedroomFiveFeatures = col_character(),
# RoomsBedroomFiveLevel = col_character(),
# RoomsBedroomFiveYN = col_character(),
# RoomsBedroomFourArea = col_character(),
# RoomsBedroomFourDescription = col_character(),
# RoomsBedroomFourDimensions = col_character(),
# RoomsBedroomFourFeatures = col_character(),
# RoomsBedroomFourLevel = col_character(),
# RoomsBedroomFourYN = col_character(),
# RoomsBedroomOneArea = col_character(),
# RoomsBedroomOneDescription = col_character(),
# RoomsBedroomOneDimensions = col_character(),
# RoomsBedroomOneFeatures = col_character(),
# RoomsBedroomOneLevel = col_character(),
# RoomsBedroomOneYN = col_character(),
# RoomsBedroomThreeArea = col_character(),
# RoomsBedroomThreeDescription = col_character(),
# RoomsBedroomThreeDimensions = col_character(),
# RoomsBedroomThreeFeatures = col_character(),
# RoomsBedroomThreeLevel = col_character(),
# RoomsBedroomThreeYN = col_character(),
# RoomsBedroomTwoArea = col_character(),
# RoomsBedroomTwoDescription = col_character(),
# RoomsBedroomTwoDimensions = col_character(),
# RoomsBedroomTwoFeatures = col_character(),
# RoomsBedroomTwoLevel = col_character(),
# RoomsBedroomTwoYN = col_character(),
# RoomsBonusRoomArea = col_character(),
# RoomsBonusRoomDescription = col_character(),
# RoomsBonusRoomDimensions = col_character(),
# RoomsBonusRoomFeatures = col_character(),
# RoomsBonusRoomLevel = col_character(),
# RoomsBonusRoomYN = col_character(),
# RoomsDenArea = col_character(),
# RoomsDenDescription = col_character(),
# RoomsDenDimensions = col_character(),
# RoomsDenFeatures = col_character(),
# RoomsDenLevel = col_character(),
# RoomsDenYN = col_character(),
# RoomsDiningDescription = col_character(),
# RoomsDiningRoomArea = col_character(),
# RoomsDiningRoomDimensions = col_character(),
# RoomsDiningRoomFeatures = col_character(),
# RoomsDiningRoomLevel = col_character(),
# RoomsDiningRoomYN = col_character(),
# RoomsExerciseArea = col_character(),
# RoomsExerciseDescription = col_character(),
# RoomsExerciseDimensions = col_character(),
# RoomsExerciseFeatures = col_character(),
# RoomsExerciseLevel = col_character(),
# RoomsExerciseYN = col_character(),
# RoomsFamilyDescription = col_character(),
# RoomsFamilyRoomArea = col_character(),
# RoomsFamilyRoomDimensions = col_character(),
# RoomsFamilyRoomFeatures = col_character(),
# RoomsFamilyRoomLevel = col_character(),
# RoomsFamilyRoomYN = col_character(),
# RoomsGameArea = col_character(),
# RoomsGameDescription = col_character(),
# RoomsGameDimensions = col_character(),
# RoomsGameFeatures = col_character(),
# RoomsGameLevel = col_character(),
# RoomsGameYN = col_character(),
# RoomsGreatRoomArea = col_character(),
# RoomsGreatRoomDescription = col_character(),
# RoomsGreatRoomDimensions = col_character(),
# RoomsGreatRoomFeatures = col_character(),
# RoomsGreatRoomLevel = col_character(),
# RoomsGreatRoomYN = col_character(),
# RoomsKitchenArea = col_character(),
# RoomsKitchenDescription = col_character(),
# RoomsKitchenDimensions = col_character(),
# RoomsKitchenFeatures = col_character(),
# RoomsKitchenLevel = col_character(),
# RoomsKitchenYN = col_character(),
# RoomsLaundryArea = col_character(),
# RoomsLaundryDescription = col_character(),
# RoomsLaundryDimensions = col_character(),
# RoomsLaundryFeatures = col_character(),
# RoomsLaundryLevel = col_character(),
# RoomsLaundryYN = col_character(),
# RoomsLivingRoomArea = col_character(),
# RoomsLivingRoomDescription = col_character(),
# RoomsLivingRoomDimensions = col_character(),
# RoomsLivingRoomFeatures = col_character(),
# RoomsLivingRoomLevel = col_character(),
# RoomsLivingRoomYN = col_character(),
# RoomsMasterBathroomArea = col_character(),
# RoomsMasterBathroomDescription = col_character(),
# RoomsMasterBathroomDimensions = col_character(),
# RoomsMasterBathroomFeatures = col_character(),
# RoomsMasterBathroomLevel = col_character(),
# RoomsMasterBathroomYN = col_character(),
# RoomsMasterBedroomArea = col_character(),
# RoomsMasterBedroomDescription = col_character(),
# RoomsMasterBedroomDimensions = col_character(),
# RoomsMasterBedroomFeatures = col_character(),
# RoomsMasterBedroomLevel = col_character(),
# RoomsMasterBedroomYN = col_character(),
# RoomsMediaArea = col_character(),
# RoomsMediaDescription = col_character(),
# RoomsMediaDimensions = col_character(),
# RoomsMediaFeatures = col_character(),
# RoomsMediaLevel = col_character(),
# RoomsMediaYN = col_character(),
# RoomsOfficeArea = col_character(),
# RoomsOfficeDescription = col_character(),
# RoomsOfficeDimensions = col_character(),
# RoomsOfficeFeatures = col_character(),
# RoomsOfficeLevel = col_character(),
# RoomsOfficeYN = col_character(),
# RoomsOtherArea = col_character(),
# RoomsOtherDescription = col_character(),
# RoomsOtherDimensions = col_character(),
# RoomsOtherFeatures = col_character(),
# RoomsOtherLevel = col_character(),
# RoomsOtherYN = col_character(),
# RoomsTotalRooms =col_double(),
# RoomsUtilityArea = col_character(),
# RoomsUtilityDescription = col_character(),
# RoomsUtilityDimensions = col_character(),
# RoomsUtilityFeatures = col_character(),
# RoomsUtilityLevel = col_character(),
# RoomsUtilityYN = col_character(),
# SchoolElementaryDistrict = col_character(),
# SchoolElementarySchool = col_character(),
# SchoolHighDistrict = col_character(),
# SchoolHighSchool = col_character(),
# SchoolJrHigh = col_character(),
# SchoolMiddleOrJuniorDistrict = col_character(),
# SchoolMiddleSchool = col_character(),
# SchoolSchoolDistrict = col_character(),
# Section = col_character(),
# SecurityFeatures = col_character(),
# SecurityFeaturesYN = col_character(),
# SeniorCommunity = col_character(),
# SeniorCommunityYN = col_character(),
# ShortSaleYN = col_character(),
# SignOnPropertyYN = col_character(),
# SoldTerms = col_character(),
# Spa = col_character(),
# SpaYN = col_character(),
# SpecialListingConditions = col_character(),
# Sprinklers = col_character(),
# StatusChangeDate =col_datetime(),
# Stories =col_double(),
# StoriesDescription = col_character(),
# SubAgencyCompensation = col_character(),
# SubAgencyCompensationType = col_character(),
# SubdivisionName = col_character(),
# TaxAssessedValue =col_double(),
# TaxBlock = col_character(),
# TaxBookNumber = col_character(),
# TaxLegalDescription = col_character(),
# TaxLot = col_character(),
# TaxMapNumber = col_character(),
# TenantPays = col_character(),
# TennisCourt = col_character(),
# TennisCourtYN = col_character(),
# Township = col_character(),
# Tract = col_character(),
# TwpSecRng = col_character(),
# Utilities = col_character(),
# UtilitiesSewer = col_character(),
# UtilitiesWater = col_character(),
# UtilitiesYN = col_character(),
# ViewDescription = col_character(),
# ViewYN = col_character(),
# WaterAccess = col_character(),
# WaterAccessYN = col_character(),
# WaterBodyName = col_character(),
# WaterFrontDescription = col_character(),
# WaterFrontYN = col_character(),
# WithdrawnDate =col_datetime(),
# YearBuilt = col_character(),
# YearBuiltEffective = col_character(),
# YearBuiltNewConstruction = col_character(),
# Zoning = col_character(),
# CMAS_CMAS_EXCEPTION_CODES = col_character(),
# CMAS_CMAS_MATCH_CODE = col_character(),
# CMAS_FIPS_CODE = col_character(),
# CMAS_FULLSITEADDRESSUNPARSED1 = col_character(),
# CMAS_PARCEL_ID = col_character(),
# CMAS_PARCEL_SEQ_NBR =col_double(),
# CMAS_PROPERTY_ADDR_ZIP_1 = col_character(),
# CMAS_PROPERTY_CITY_1 = col_character(),
# CMAS_PROPERTY_STATE_1 = col_character(),
# CMAS_STATE_ID = col_character(),
# CMAS_TRACT_NBR = col_character(),
# CMAS_TRCT_SUBDIV_NAME = col_character(),
# CMAS_UNIT_numeric = col_character(),
# CMAS_Zip5 = col_character(),
# CMAS_PROPERTY_USE_CODE = col_character()
# )
# )
#
# df <- df %>% rename_all(tolower)
df2 <- df %>% select(fa_apn, fa_bathstotal, fa_dom, fa_closedate, fa_garagestyle, fa_listdate, fa_listid, fa_liststatus,
fa_originallistdate, fa_pool, fa_propertytype, fa_rent_sale_ind, fa_squarefeet, fa_storiesnbr,
fa_yearbuilt, propertytypeid, propertytypestandardname, fa_liststatus,
syspropertyid, cmas_fips_code, cmas_parcel_id, cmas_parcel_seq_nbr, buyeragentid,
listprice, originallistprice, listprice,
closeprice, garageyn, poolyn, taxassessedvalue, coolingyn, fireplacesyn,
roomsbasementyn, waterfrontyn, foreclosureyn, shortsaleyn, livingarea, beds)
# #file.remove(fn2)
#
# ## convert listing date and closing date to Date variable and extract listing/closing year
df2 <- df2 %>% mutate(list_date = date(fa_listdate)) %>%
mutate(list_year = year(list_date)) %>%
mutate(close_date = date(fa_closedate)) %>%
mutate(close_year = year(close_date))
#
df2 <- df2 %>% mutate(closeprice = na_if(closeprice, 0))
#
# write_csv(df2, paste0(strsplit(fn, "\\.")[[1]][1], "_parsed", ".csv"))