nidus-sync/platform/csv/pool.go

484 lines
15 KiB
Go
Raw Normal View History

package csv
import (
2026-02-08 04:46:54 +00:00
"context"
"encoding/csv"
"fmt"
"io"
"strconv"
"strings"
"sync"
"time"
"github.com/Gleipnir-Technology/bob"
"github.com/Gleipnir-Technology/bob/dialect/psql"
"github.com/Gleipnir-Technology/bob/dialect/psql/um"
"github.com/Gleipnir-Technology/nidus-sync/config"
2026-02-08 04:46:54 +00:00
"github.com/Gleipnir-Technology/nidus-sync/db"
"github.com/Gleipnir-Technology/nidus-sync/db/enums"
2026-02-08 04:46:54 +00:00
"github.com/Gleipnir-Technology/nidus-sync/db/models"
"github.com/Gleipnir-Technology/nidus-sync/h3utils"
"github.com/Gleipnir-Technology/nidus-sync/platform/geom"
"github.com/Gleipnir-Technology/nidus-sync/platform/text"
"github.com/Gleipnir-Technology/nidus-sync/stadia"
2026-02-08 04:46:54 +00:00
"github.com/Gleipnir-Technology/nidus-sync/userfile"
"github.com/aarondl/opt/omit"
"github.com/aarondl/opt/omitnull"
2026-02-08 04:46:54 +00:00
"github.com/rs/zerolog/log"
)
type headerPoolEnum int
const (
headerAddressCity = iota
headerAddressPostalCode
headerAddressStreet
headerCondition
headerNotes
headerPropertyOwnerName
headerPropertyOwnerPhone
headerResidentOwned
headerResidentPhone
headerTag
)
func (e headerPoolEnum) String() string {
switch e {
case headerAddressCity:
return "City"
case headerAddressPostalCode:
return "Postal Code"
case headerAddressStreet:
return "Street Address"
case headerCondition:
return "Condition"
case headerNotes:
return "Notes"
case headerPropertyOwnerName:
return "Property Owner Name"
case headerPropertyOwnerPhone:
return "Property Owner Phone"
case headerResidentOwned:
return "Resident Owned"
case headerResidentPhone:
return "Resident Phone"
default:
return "bad programmer"
}
}
2026-02-08 04:46:54 +00:00
func ProcessJob(ctx context.Context, file_id int32) error {
file, err := models.FindFileuploadFile(ctx, db.PGInstance.BobDB, file_id)
if err != nil {
return fmt.Errorf("Failed to get file %d from DB: %w", file_id, err)
}
txn, err := db.PGInstance.BobDB.BeginTx(ctx, nil)
if err != nil {
return fmt.Errorf("Failed to start transaction: %w", err)
}
defer txn.Rollback(ctx)
c, err := models.FindFileuploadCSV(ctx, txn, file.ID)
if err != nil {
return fmt.Errorf("Failed to get csv file %d from DB: %w", file.ID, err)
}
pools, err := parseFile(ctx, txn, file, c)
if err != nil {
return fmt.Errorf("parse file: %w", err)
}
_, err = psql.Update(
um.Table("fileupload.csv"),
um.SetCol("rowcount").ToArg(len(pools)),
2026-02-24 20:07:39 +00:00
um.Where(psql.Quote("file_id").EQ(psql.Arg(file_id))),
).Exec(ctx, txn)
if err != nil {
return fmt.Errorf("update csv row: %w", err)
}
org, err := models.FindOrganization(ctx, db.PGInstance.BobDB, file.OrganizationID)
if err != nil {
return fmt.Errorf("get org: %w", err)
}
err = bulkGeocode(ctx, txn, file, c, pools, org)
if err != nil {
log.Error().Err(err).Msg("Failure during geocoding")
}
file.Update(ctx, txn, &models.FileuploadFileSetter{
Status: omit.From(enums.FileuploadFilestatustypeParsed),
})
log.Info().Int32("file.ID", file.ID).Msg("Set file to parsed")
txn.Commit(ctx)
return nil
}
func bulkGeocode(ctx context.Context, txn bob.Tx, file *models.FileuploadFile, c *models.FileuploadCSV, pools []*models.FileuploadPool, org *models.Organization) error {
if len(pools) == 0 {
return nil
}
log.Info().Int("len pools", len(pools)).Msg("bulk geocoding")
client := stadia.NewStadiaMaps(config.StadiaMapsAPIKey)
jobs := make(chan *jobGeocode, len(pools))
errors := make(chan error, len(pools))
var wg sync.WaitGroup
for i := 0; i < 20; i++ {
wg.Add(1)
go worker(ctx, txn, client, jobs, errors, &wg)
}
2026-02-16 17:59:18 +00:00
for i, pool := range pools {
jobs <- &jobGeocode{
csv: c,
rownumber: int32(i),
org: org,
pool: pool,
}
}
close(jobs)
go func() {
wg.Wait()
close(errors)
}()
error_count := 0
for err := range errors {
log.Error().Err(err).Msg("failed to geocode")
error_count++
}
if error_count > 0 {
txn.Rollback(ctx)
return fmt.Errorf("%d errors encountered in bulk geocode", error_count)
}
update_query := `
UPDATE fileupload.pool p
SET is_in_district = (
EXISTS (
SELECT 1
FROM import.district d
JOIN organization o ON d.gid = o.import_district_gid
WHERE o.id = p.organization_id
AND ST_Contains(d.geom_4326, p.geom)
)
)
WHERE p.geom IS NOT NULL;`
_, err := txn.ExecContext(ctx, update_query)
if err != nil {
return fmt.Errorf("failed to update is_in_district: %w", err)
}
return nil
}
type jobGeocode struct {
csv *models.FileuploadCSV
rownumber int32
org *models.Organization
pool *models.FileuploadPool
}
func geocode(ctx context.Context, txn bob.Tx, client *stadia.StadiaMaps, job *jobGeocode) error {
pool := job.pool
sublog := log.With().
Str("pool.address_postal", pool.AddressPostalCode).
Str("pool.address_street", pool.AddressStreet).
Str("pool.postal", pool.AddressPostalCode).
Logger()
req := stadia.StructuredGeocodeRequest{
Address: &pool.AddressStreet,
Locality: &pool.AddressCity,
PostalCode: &pool.AddressPostalCode,
}
maybeAddServiceArea(&req, job.org)
resp, err := client.StructuredGeocode(ctx, req)
if err != nil {
return fmt.Errorf("client structured geocode failure on %s, %s, %s: %w", pool.AddressStreet, pool.AddressCity, pool.AddressPostalCode, err)
}
if len(resp.Features) > 1 {
sublog.Warn().Int("len", len(resp.Features)).Msg("More than one feature")
addError(ctx, txn, job.csv, job.rownumber, 0, "The address provided matched more than one location")
}
feature := resp.Features[0]
if feature.Geometry.Type != "Point" {
return fmt.Errorf("wrong type %s from %s %s", feature.Geometry.Type, pool.AddressStreet, pool.AddressPostalCode)
}
longitude := feature.Geometry.Coordinates[0]
latitude := feature.Geometry.Coordinates[1]
cell, err := h3utils.GetCell(longitude, latitude, 15)
if err != nil {
return fmt.Errorf("failed to convert lat %f lng %f to h3 cell", longitude, latitude)
}
geom_query := geom.PostgisPointQuery(longitude, latitude)
_, err = psql.Update(
um.Table("fileupload.pool"),
um.SetCol("h3cell").ToArg(cell),
um.SetCol("geom").To(geom_query),
um.Where(psql.Quote("id").EQ(psql.Arg(pool.ID))),
).Exec(ctx, txn)
if err != nil {
return fmt.Errorf("failed to update pool: %w", err)
}
return nil
}
func parseFile(ctx context.Context, txn bob.Tx, file *models.FileuploadFile, c *models.FileuploadCSV) ([]*models.FileuploadPool, error) {
pools := make([]*models.FileuploadPool, 0)
2026-02-08 04:46:54 +00:00
r, err := userfile.NewFileReader(userfile.CollectionCSV, file.FileUUID)
if err != nil {
return pools, fmt.Errorf("Failed to get filereader for %d: %w", file.ID, err)
2026-02-08 04:46:54 +00:00
}
reader := csv.NewReader(r)
h, err := reader.Read()
2026-02-08 04:46:54 +00:00
if err != nil {
return pools, fmt.Errorf("Failed to read header of CSV for file %d: %w", file.ID, err)
2026-02-08 04:46:54 +00:00
}
header_types, header_names := parseHeaders(h)
missing_headers := missingRequiredHeaders(header_types)
for _, mh := range missing_headers {
errorMissingHeader(ctx, txn, c, mh)
file.Update(ctx, txn, &models.FileuploadFileSetter{
Status: omit.From(enums.FileuploadFilestatustypeError),
})
return pools, nil
}
2026-02-16 17:59:18 +00:00
// Start at 2 because the header is line 1, not line 0
line_number := int32(2)
for {
row, err := reader.Read()
if err != nil {
if err == io.EOF {
return pools, nil
}
return pools, fmt.Errorf("Failed to read all CSV records for file %d: %w", file.ID, err)
}
tags := make(map[string]string, 0)
setter := models.FileuploadPoolSetter{
// required fields
//AddressCity: omit.From(),
//AddressPostalCode: omit.From(),
//AddressStreet: omit.From(),
Committed: omit.From(false),
Condition: omit.From(enums.FileuploadPoolconditiontypeUnknown),
Created: omit.From(time.Now()),
CreatorID: omit.From(file.CreatorID),
CSVFile: omit.From(file.ID),
Deleted: omitnull.FromPtr[time.Time](nil),
Geom: omitnull.FromPtr[string](nil),
H3cell: omitnull.FromPtr[string](nil),
// ID - generated
2026-02-14 05:40:27 +00:00
IsInDistrict: omit.From(false),
2026-02-16 16:49:24 +00:00
IsNew: omit.From(true),
2026-02-16 17:59:18 +00:00
LineNumber: omit.From(line_number),
2026-02-14 05:40:27 +00:00
Notes: omit.From(""),
OrganizationID: omit.From(file.OrganizationID),
PropertyOwnerName: omit.From(""),
PropertyOwnerPhoneE164: omitnull.FromPtr[string](nil),
ResidentOwned: omitnull.FromPtr[bool](nil),
ResidentPhoneE164: omitnull.FromPtr[string](nil),
//Tags: convertToPGData(tags),
}
for i, col := range row {
hdr_t := header_types[i]
2026-02-14 15:42:59 +00:00
if col == "" {
continue
}
switch hdr_t {
case headerAddressCity:
setter.AddressCity = omit.From(col)
case headerAddressPostalCode:
setter.AddressPostalCode = omit.From(col)
case headerAddressStreet:
setter.AddressStreet = omit.From(col)
case headerCondition:
var condition enums.FileuploadPoolconditiontype
col_l := strings.ToLower(col)
col_translated := col_l
switch col_l {
case "empty":
col_translated = "dry"
}
err := condition.Scan(col_translated)
if err != nil {
2026-02-16 17:59:18 +00:00
addError(ctx, txn, c, int32(line_number), int32(i), fmt.Sprintf("'%s' is not a pool condition that we recognize. It should be one of %s", col, poolConditionValidValues()))
setter.Condition = omit.From(enums.FileuploadPoolconditiontypeUnknown)
continue
}
setter.Condition = omit.From(condition)
case headerNotes:
setter.Notes = omit.From(col)
case headerPropertyOwnerName:
setter.PropertyOwnerName = omit.From(col)
case headerPropertyOwnerPhone:
phone, err := text.ParsePhoneNumber(col)
if err != nil {
2026-02-16 17:59:18 +00:00
addError(ctx, txn, c, int32(line_number), int32(i), fmt.Sprintf("'%s' is not a phone number that we recognize. Ideally it should be of the form '+12223334444'", col))
continue
}
text.EnsureInDB(ctx, txn, *phone)
2026-02-14 05:40:27 +00:00
setter.PropertyOwnerPhoneE164 = omitnull.From(text.PhoneString(*phone))
case headerResidentOwned:
boolValue, err := parseBool(col)
if err != nil {
2026-02-16 17:59:18 +00:00
addError(ctx, txn, c, int32(line_number), int32(i), fmt.Sprintf("'%s' is not something that we recognize as a true/false value. Please use either 'true' or 'false'", col))
continue
}
setter.ResidentOwned = omitnull.From(boolValue)
case headerResidentPhone:
2026-02-14 05:40:27 +00:00
phone, err := text.ParsePhoneNumber(col)
if err != nil {
2026-02-16 17:59:18 +00:00
addError(ctx, txn, c, int32(line_number), int32(i), fmt.Sprintf("'%s' is not a phone number that we recognize. Ideally it should be of the form '+12223334444'", col))
2026-02-14 05:40:27 +00:00
continue
}
text.EnsureInDB(ctx, txn, *phone)
setter.ResidentPhoneE164 = omitnull.From(text.PhoneString(*phone))
case headerTag:
tags[header_names[i]] = col
}
}
setter.Tags = omit.From(db.ConvertToPGData(tags))
pool, err := models.FileuploadPools.Insert(&setter).One(ctx, txn)
if err != nil {
return pools, fmt.Errorf("Failed to create pool: %w", err)
}
pools = append(pools, pool)
2026-02-16 17:59:18 +00:00
line_number = line_number + 1
2026-02-08 04:46:54 +00:00
}
}
func addError(ctx context.Context, txn bob.Tx, c *models.FileuploadCSV, row_number int32, column_number int32, msg string) error {
r, err := models.FileuploadErrorCSVS.Insert(&models.FileuploadErrorCSVSetter{
Col: omit.From(column_number),
CSVFileID: omit.From(c.FileID),
// ID
Line: omit.From(row_number),
Message: omit.From(msg),
}).One(ctx, txn)
if err != nil {
return fmt.Errorf("Failed to add error: %w", err)
}
2026-02-14 05:40:27 +00:00
log.Info().Int32("id", r.ID).Int32("file_id", c.FileID).Str("msg", msg).Int32("row", row_number).Int32("col", column_number).Msg("Created CSV file error")
return nil
}
func addImportError(file *models.FileuploadFile, err error) {
log.Debug().Err(err).Int32("file_id", file.ID).Msg("Fake add import error")
}
func parseBool(s string) (bool, error) {
sl := strings.ToLower(s)
boolValue, err := strconv.ParseBool(sl)
if err != nil {
// Handle some of the stuff that strconv doesn't handle
switch sl {
case "yes":
return true, nil
case "no":
return false, nil
default:
return false, fmt.Errorf("unrecognized '%s'", sl)
}
}
return boolValue, err
}
func errorMissingHeader(ctx context.Context, txn bob.Tx, c *models.FileuploadCSV, h headerPoolEnum) error {
msg := fmt.Sprintf("The file is missing the '%s' header", h.String())
return addError(ctx, txn, c, 0, 0, msg)
}
func maybeAddServiceArea(req *stadia.StructuredGeocodeRequest, org *models.Organization) {
/*
if org.ServiceAreaXmax.IsNull() ||
org.ServiceAreaYmax.IsNull() ||
org.ServiceAreaXmin.IsNull() ||
org.ServiceAreaYmin.IsNull() {
return
}
xmax := org.ServiceAreaXmax.MustGet()
ymax := org.ServiceAreaYmax.MustGet()
xmin := org.ServiceAreaXmin.MustGet()
ymin := org.ServiceAreaYmin.MustGet()
req.BoundaryRectMaxLon = &xmax
req.BoundaryRectMaxLat = &ymax
req.BoundaryRectMinLon = &xmin
req.BoundaryRectMinLat = &ymin
*/
if org.ServiceAreaCentroidX.IsNull() || org.ServiceAreaCentroidY.IsNull() {
return
}
centroid_x := org.ServiceAreaCentroidX.MustGet()
centroid_y := org.ServiceAreaCentroidY.MustGet()
req.FocusPointLat = &centroid_y
req.FocusPointLng = &centroid_x
}
func parseHeaders(row []string) ([]headerPoolEnum, []string) {
result_enums := make([]headerPoolEnum, 0)
result_names := make([]string, 0)
for _, h := range row {
ht := strings.TrimSpace(h)
hl := strings.ToLower(ht)
log.Debug().Str("header", hl).Msg("Saw CSV header")
var type_ headerPoolEnum
switch hl {
case "city":
type_ = headerAddressCity
case "zip":
case "postal code":
type_ = headerAddressPostalCode
case "street address":
type_ = headerAddressStreet
case "condition":
case "pool condition":
type_ = headerCondition
case "notes":
type_ = headerNotes
case "property owner":
case "property owner name":
type_ = headerPropertyOwnerName
case "property owner phone":
type_ = headerPropertyOwnerPhone
case "resident owned":
type_ = headerResidentOwned
case "resident phone":
case "resident phone number":
type_ = headerResidentPhone
default:
type_ = headerTag
}
result_enums = append(result_enums, type_)
result_names = append(result_names, hl)
}
return result_enums, result_names
}
func missingRequiredHeaders(headers []headerPoolEnum) []headerPoolEnum {
results := make([]headerPoolEnum, 0)
for _, rh := range []headerPoolEnum{headerAddressCity, headerAddressPostalCode, headerAddressStreet} {
present := false
for _, h := range headers {
if h == rh {
present = true
break
}
}
if !present {
results = append(results, rh)
}
}
return results
}
func poolConditionValidValues() string {
var b strings.Builder
for i, cond := range enums.AllFileuploadPoolconditiontype() {
if i == 0 {
fmt.Fprintf(&b, "'%s'", cond)
} else {
fmt.Fprintf(&b, ", '%s'", cond)
}
}
return b.String()
}
func worker(ctx context.Context, txn bob.Tx, client *stadia.StadiaMaps, jobs <-chan *jobGeocode, errors chan<- error, wg *sync.WaitGroup) {
defer wg.Done()
for job := range jobs {
err := geocode(ctx, txn, client, job)
if err != nil {
errors <- err
}
}
}