diff --git a/server/package.yaml b/server/package.yaml index 1adc9c9..931fdd3 100644 --- a/server/package.yaml +++ b/server/package.yaml @@ -20,27 +20,18 @@ description: Please see the README on Github at = 4.7 && < 5 -# use text for string-like everything - text -# for efficient representation in some cases - bytestring -# for our monad stack - mtl -# for hashmap (using it in results) - hashable - unordered-containers -# for database access - postgresql-simple -# for making http requests to external APIs - wreq -- http-client # for using the types -# for parsing and building JSON request/responses +- http-client # required only for the types - aeson - aeson-casing - lens -# well who doesn't have to deal with date/time - time -# for our http webserver - servant - servant-server - servant-docs @@ -50,7 +41,6 @@ dependencies: - wai-extra - wai-cors - warp -# async concurrency - async - file-embed - say @@ -110,8 +100,6 @@ executables: - -threaded - -rtsopts - -with-rtsopts=-N - - -O2 - - -Wall dependencies: - birdism diff --git a/server/res/schema.sql b/server/res/schema.sql index 53cf6e4..2c9caec 100644 --- a/server/res/schema.sql +++ b/server/res/schema.sql @@ -13,7 +13,8 @@ CREATE TABLE IF NOT EXISTS taxonomy ( family_scientific_name TEXT, report_as TEXT, extinct BOOLEAN, - extinct_year INTEGER + extinct_year INTEGER, + family_code TEXT ); CREATE TABLE IF NOT EXISTS region ( diff --git a/server/src/Worker/PopulateRegion.hs b/server/src/Worker/PopulateRegion.hs index 3a98163..c01a408 100644 --- a/server/src/Worker/PopulateRegion.hs +++ b/server/src/Worker/PopulateRegion.hs @@ -3,6 +3,7 @@ module Worker.PopulateRegion where import Control.Lens import qualified Control.Retry as Retry +import qualified Data.Text as T import qualified Database.PostgreSQL.Simple as PG import qualified Say @@ -24,9 +25,11 @@ populateRegion config = do -- Step 3: get the countries first countries <- runWithRetry getCountries _axEbirdConf + sayDebug $ "Retrieved all countries. " <> tshow (length countries) <> " countries..." -- Step 4.1: insert the countries into db insertRegions (_dbConnection _axDbConn) countries + sayDebug "countries written to DB" -- ~~Step 4.1: concurrently, get subnationals-1 of each country~~ -- Step 5: get subnationals1 syncly @@ -34,7 +37,15 @@ populateRegion config = do sayDebug $ "Total subnationals-1 is: " <> tshow (length subnats1) - forM_ subnats1 $ \subnat1 -> do + sayDebug $ "Filtering subnats-1 which we know don't have subnats-2: " <> tshow (length noSubNats2) + let filteredSubNats1 = filter checkPrefix subnats1 + checkPrefix sns = case sns of + [] -> False + (sn:_) -> any (\countryCode -> not $ countryCode `T.isPrefixOf` uRegionCode (_rCode sn)) noSubNats2 + + sayDebug $ "Filtered subnationals-1 is: " <> tshow (length filteredSubNats1) + + forM_ filteredSubNats1 $ \subnat1 -> do sayDebug $ "running one subnational1: " <> tshow subnat1 subnats2 <- getSubnats2 ctx subnat1 sayDebug $ "got " <> tshow (length subnats2) <> " subnational-2 regions" @@ -63,7 +74,7 @@ populateRegion config = do getSubnats2 ctx subnats1 = do forM subnats1 $ \subnat1 -> do - sayDebug $ "getting subnational-2 region of subnational-1: " <> (uRegionCode $ _rCode subnat1) + sayDebug $ "getting subnational-2 region of subnational-1: " <> uRegionCode (_rCode subnat1) subnats2 <- runWithRetry (getSubnationa2Regions subnat1) ctx insertRegions (_dbConnection $ _axDbConn ctx) subnats2 sleep 3 @@ -112,3 +123,6 @@ run trans cfg = do sayDebug :: (MonadIO m) => Text -> m () sayDebug = Say.say . ("[DEBUG] " <>) + +noSubNats2 :: [Text] +noSubNats2 = ["BF","BG","BR","BW","BO","BT","BJ","BZ","BE","BY","BB","BD","BH","BS","AZ","AT","AM","AG","AO","AD","DZ","AL","AF","BI","KH","CM","CV","BQ","KY","CF","TD","CN","CO","KM","CG","CR","CI","HR","CU","CY","CZ","DK","DJ","DM","DO","CD","EC","EG","SV","ER","EE","SZ","ET","FJ","FI","PF","GA","GM","GE","GH","GR","GD","GT","GN","GW","GY","HT","HN","HU","IS","IR","IQ","IL","IT","JM","JP","JO","KZ","KE","KI","KW","KG","LA","LV","LB","LS","LR","LY","LI","LT","LU","MG","MW","MY","MV","ML","MT","MH","MR","MU","FM","MD","MN","ME","MA","MZ","MM","NA","NR","NL","NI","NE","NG","MP","KP","MK","NO","OM","PK","PW","PA","PG","PY","PE","PH","PL","PR","QA","RO","RU","RW","SH","KN","LC","VC","WS","SM","ST","SA","SN","RS","SC","SL","SK","SI","SB","SO","ZA","KR","SS","SD","SR","SE","CH","SY","TW","TJ","TZ","TH","TL","TG","TO","TT","TN","TR","TM","TV","UG","UA","AE","UM","UY","UZ","VU","VE","VN","VI","YE","ZM","ZW"] diff --git a/server/src/Worker/PopulateTaxonomy.hs b/server/src/Worker/PopulateTaxonomy.hs index 960aa61..0a23748 100644 --- a/server/src/Worker/PopulateTaxonomy.hs +++ b/server/src/Worker/PopulateTaxonomy.hs @@ -3,6 +3,7 @@ module Worker.PopulateTaxonomy where import Control.Lens import qualified Data.ByteString.Lazy as BL +import qualified Data.ByteString.Lazy.Char8 as BLC import qualified Data.Text.Encoding as T import qualified Database.PostgreSQL.Simple as PG import qualified Database.PostgreSQL.Simple.Copy as PG @@ -12,6 +13,7 @@ import Birdism.Common import Birdism.Config import Birdism.Init import HTTP +import Data.Time (getCurrentTime) taxonomyUrl :: String taxonomyUrl = "https://api.ebird.org/v2/ref/taxonomy/ebird" @@ -24,13 +26,15 @@ populateTaxonomy config = do case resp of Left e -> putStrLn ("FATAL ERROR: could not receive taxonomy from ebird: " <> show e) >> Sys.exitFailure Right res -> do + currTime <- getCurrentTime + BLC.writeFile ("ebird-taxonomy-" <> show currTime <> ".csv") res PG.copy_ (ctx ^. dbConnection) copyQ PG.putCopyData (ctx ^. dbConnection) $ BL.toStrict res reply <- PG.putCopyEnd (ctx ^. dbConnection) print reply copyQ :: PG.Query -copyQ = "COPY taxonomy(scientific_name, common_name, species_code, category, taxonomy_order, common_name_code, scientific_name_code, banding_codes, order_name, family_common_name, family_scientific_name, report_as, extinct, extinct_year) FROM STDIN DELIMITER ',' CSV HEADER;" +copyQ = "COPY taxonomy(scientific_name, common_name, species_code, category, taxonomy_order, common_name_code, scientific_name_code, banding_codes, order_name, family_common_name, family_scientific_name, report_as, extinct, extinct_year, family_code) FROM STDIN DELIMITER ',' CSV HEADER;" truncateTaxonomyTable :: PG.Connection -> IO () truncateTaxonomyTable conn = do diff --git a/server/stack.yaml b/server/stack.yaml index 1dcb8ce..6865bb4 100644 --- a/server/stack.yaml +++ b/server/stack.yaml @@ -15,7 +15,7 @@ # resolver: # name: custom-snapshot # location: "./custom-snapshot.yaml" -resolver: lts-18.18 +resolver: lts-20.11 # User packages to be built. # Various formats can be used as shown in the example below. @@ -40,9 +40,7 @@ packages: # Dependency packages to be pulled from upstream that are not in the resolver # (e.g., acme-missiles-0.3) # extra-deps: [] -extra-deps: -- Spock-core-0.13.0.0 -- reroute-0.5.0.0 +extra-deps: [] # Override default flag values for local packages and extra-deps # flags: {} diff --git a/server/stack.yaml.lock b/server/stack.yaml.lock index f2363fc..90e049c 100644 --- a/server/stack.yaml.lock +++ b/server/stack.yaml.lock @@ -3,24 +3,10 @@ # For more information, please see the documentation at: # https://docs.haskellstack.org/en/stable/lock_files -packages: -- completed: - hackage: Spock-core-0.13.0.0@sha256:06e007f23c47bdda52d2927da54160d73f1b6f51a977f3ca9087275698db8f0a,3400 - pantry-tree: - size: 1113 - sha256: 86140298020f68bb09d07b26a6a6f1666fc3a02715d7986b09150727247a1a84 - original: - hackage: Spock-core-0.13.0.0 -- completed: - hackage: reroute-0.5.0.0@sha256:3360747cdc700c9808a38bff48b75926efa443d4af282396082329a218a8d9d3,2446 - pantry-tree: - size: 660 - sha256: 52afcff0a5dba2fb746be4fa8cfa56cf774272872b61130537fe0e7ad463c0cd - original: - hackage: reroute-0.5.0.0 +packages: [] snapshots: - completed: - size: 586296 - url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/18/18.yaml - sha256: 63539429076b7ebbab6daa7656cfb079393bf644971156dc349d7c0453694ac2 - original: lts-18.18 + sha256: adbc602422dde10cc330175da7de8609e70afc41449a7e2d6e8b1827aa0e5008 + size: 649342 + url: https://raw.githubusercontent.com/commercialhaskell/stackage-snapshots/master/lts/20/11.yaml + original: lts-20.11