Main problem is handling the correct birthday for people with Feb 29 as birthday. They
usually celebrate their birthday in leap years on Feb 29, otherwise Feb 28. Here's some
code for PostgreSQL (9.0 used)
-- A simple helper function
create or replace function getDayInYear(timestamp) returns int as $$
select floor(date_part('day', $1) +
date_part('month', $1)*100)::int;
$$ language sql;
CREATE TABLE member (
id serial not null primary key,
date_of_birth date not null,
dob int2 not null default 0
);
-- Create a suitable index
create index member_dob_idx on member(dob);
-- And a trigger to fill dob.
create or replace function computeDob() returns trigger as $$
begin
NEW.dob = getDayInYear(NEW.date_of_birth);
return NEW;
end
$$ language plpgsql;
create trigger tgDow
before insert or update
on member for each row execute procedure computeDob();
-- populate table with 5 million rows testdata with users ranging from 13 to 60 years.
insert into member (date_of_birth)
select current_date -
(floor(13*365 + random() * (47*365)) || ' days')::interval
from generate_series(1, 5000000);
analyze member;
-- makes it easier for demos
create or replace function getBirthdayMembers(timestamp) returns setof member as $$
select *
from member
where dob between
getDayInYear($1 + '1 day'::interval)
and
(getDayInYear($1 + '1 day'::interval) +
case
-- called on Feb 27 in a leap year
when getDayInYear($1) = 227
and getDayInYear($1 + '2 days'::interval) = 229
then 0
-- called on Feb 27 in a non-leap year
when getDayInYear($1) = 227
and getDayInYear($1 + '2 days'::interval) > 229
then 1
else 0
end
)
$$ language sql;
-- get all members having birthday tomorrow (first parameter is called date)
explain analyze select * from getBirthdayMembers(CURRENT_DATE);
explain analyze select * from getBirthdayMembers('2011-02-27');
explain analyze select * from getBirthdayMembers('2011-02-28');
explain analyze select * from getBirthdayMembers('2012-02-27');
explain analyze select * from getBirthdayMembers('2012-02-28');
Performance is good, even on my old desktop machine I get response times of ~100 msec for 5 mio rows. However, 5 mio
is small enough to fit in memory, it would become more interesting with 500 mio members (but I do not have enough disk space on this machine to test).
Weekly jobs would be similar, however there would be more corner cases to test (I guess so). Nothing too complicated.
answered
Feb 09 '11 at 08:54
Neutrino
71●1●1●5
edited
Feb 09 '11 at 12:05
Markus Winand ♦♦
936●5●11●20