# David Tate # Voter targeting and matching. # Result should be: (tab-delimited) # Part 1 (voter history compression) # $uniq_voter_id \t$election_dt (most-recent)\t$num_times_voted # Part 2 (matching with voter file information) # (above plus large amount of extra information) # (these records should be tab-delimited and should conform to the # specs in NCVoter_readme) # Keep tab-delimited format and other constraints in place when # creating the new database. See NCVoter_readme.txt for a # full description of the full format. use Time::Local; $input_county_id = $ARGV[0]; $result_file = $ARGV[1] || "results" . $input_county_id . ".txt"; %county_voters; $start = (times)[0]; MAIN: { # define which records are which so the rest can be generic code %record_format = ( county_id => 0, county_desc => 1, voter_reg_num => 2, status_cd => 3, last_name => 9, first_name => 10, midl_name => 11, name_sufx_cd => 12, house_num => 15, half_code => 16, street_dir => 17, street_name => 18, street_type_cd => 19, street_sufx_cd => 20, unit_designator => 21, unit_num => 22, res_city_desc => 23, state_cd => 24, zip_code => 25, mail_addr1 => 28, mail_addr2 => 29, mail_addr3 => 30, mail_addr4 => 31, mail_city => 32, mail_state => 33, mail_zipcode => 34, area_cd => 36, phone_num => 37, full_phone_number => 38, race_code => 40, party_cd => 42, sex_code => 44, birth_dt => 46, birth_place => 47, registr_dt => 48, precinct_abbrv => 49, municipality_abbrv => 51, ward_abbrv => 53, cong_dist_abbrv => 55, super_court_abbrv => 57, judic_court_abbrv => 59, nc_senate_abbrv => 61, nc_house_abbrv => 63, county_commiss_abbrv => 65, township_abbrv => 67, school_dist_abbrv => 69, fire_dist_abbrv => 71, water_dist_abbrv => 73, sewer_dist_abbrv => 75, sanit_dist_abbrv => 77, rescue_dist_abbrv => 79, munic_dist_abbrv => 81, dist_1_abbrv => 83, dist_2_abbrv => 85, ); %voter_format = reverse %record_format; condense_hist($input_county_id); write_new_db($input_county_id, $result_file); } $end = (times)[0]; print "Took: ", $end - $start, "\n"; sub condense_hist { my $county = shift; my $history_file = "ncvhistory/" . $county . "vht.dat"; open HIST_FILE, "$history_file" or die "Can't open file: $history_file, count: $input_county_id: $!"; while () { my @record = split ('\t'); my $county_id = $record[0]; my $voter_reg_num = $record[2]; my $election_dt = $record[4]; # Create unique id my $uniq_voter_id = $county_id . $voter_reg_num; my %voter; # Grab current voter info to update it. if (exists($county_voters{$uniq_voter_id})) { %voter = %{$county_voters{$uniq_voter_id}}; } $voter{'voted_count'}++; # Update most recent vote if needed. if (to_epoch($election_dt) > to_epoch($voter{'most_recent_vote'})) { $voter{'most_recent_vote'} = $election_dt; } $county_voters{$uniq_voter_id} = \%voter; } } sub write_new_db { my $county = shift; my $result_file = shift; open RESULT_FILE, ">$result_file" or die "Can't open $result_file: $!"; # Create voter file name from county_id $voter_file = "ncvoter/" . $county . "voter.dat"; open VOTER_FILE, "$voter_file" or die "Can't open $voter_file, in county: $county : $!"; while () { my @record = split('\t'); my $updated_record = ""; my $uniq_voter_id = $county_id . $voter_reg_num; $updated_record .= $uniq_voter_id . "\t"; # Output the new database one updated record at a time ... foreach my $key (sort values(%voter_format)) { $updated_record .= $record[$key] . "\t"; } $updated_record .= "\t" . $county_voters{$uniq_voter_id}{'most_recent_vote'} . "\t" . $county_voters{$uniq_voter_id}{'voted_count'} . "\n"; print RESULT_FILE $updated_record; } } # Convert date to epoch for easy comparisions. # Format of the datetime is: # 1994-11-08 00:00:00.000 11/08/1994 sub to_epoch { my $date = shift; if ($date == undef) { return 0; } my @sections = split(' ', $date); my @parts = split('-', $sections[0]); # timelocal($sec, $min, $hours, $mday, $mon, $year) my $seconds = timelocal(0, 0, 0, $parts[2], $parts[1] - 1, $parts[0] - 1900); return $seconds; }