Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -34,6 +34,9 @@ def filterfunc(x: dict) -> bool:
|
|
34 |
if len(x.get("text", "").split()) < 100:
|
35 |
return False
|
36 |
|
|
|
|
|
|
|
37 |
excluded = {"Promotional/Advertisement", "Machine-Generated", "Images/Videos/Audio",
|
38 |
"Truncated", "Spam/Ads", "Product Page", "Content Listing"}
|
39 |
|
@@ -42,31 +45,6 @@ def filterfunc(x: dict) -> bool:
|
|
42 |
if label := x.get("eai_taxonomy", {}).get(version, {}).get(level, {}).get("label"):
|
43 |
if label in excluded:
|
44 |
return False
|
45 |
-
|
46 |
-
# FDC pairing: Medicine (61) with another science code - widened scope
|
47 |
-
dds_primary = x.get("eai_taxonomy", {}).get("dds", {}).get("primary", {}).get("label", "")
|
48 |
-
dds_secondary = x.get("eai_taxonomy", {}).get("dds", {}).get("secondary", {}).get("label", "")
|
49 |
-
|
50 |
-
# Debug: Log some DDS codes to see what's available
|
51 |
-
if dds_primary or dds_secondary:
|
52 |
-
logger.debug(f"DDS codes found - Primary: {dds_primary}, Secondary: {dds_secondary}")
|
53 |
-
|
54 |
-
# Widened FDC filter: Accept if ANY DDS code starts with medical/science prefixes
|
55 |
-
fdc_paired = (
|
56 |
-
# Original strict pairing
|
57 |
-
(prefix(dds_primary) in FDC_KEEP and prefix(dds_secondary) in SCIENCE_CODES) or
|
58 |
-
(prefix(dds_secondary) in FDC_KEEP and prefix(dds_primary) in SCIENCE_CODES) or
|
59 |
-
# Widened: Accept if primary OR secondary has medical code
|
60 |
-
prefix(dds_primary) in FDC_KEEP or
|
61 |
-
prefix(dds_secondary) in FDC_KEEP or
|
62 |
-
# Even wider: Accept if ANY DDS code is in science codes
|
63 |
-
prefix(dds_primary) in SCIENCE_CODES or
|
64 |
-
prefix(dds_secondary) in SCIENCE_CODES
|
65 |
-
)
|
66 |
-
|
67 |
-
if not fdc_paired:
|
68 |
-
return False
|
69 |
-
|
70 |
return True
|
71 |
|
72 |
|
|
|
34 |
if len(x.get("text", "").split()) < 100:
|
35 |
return False
|
36 |
|
37 |
+
if x.get("eai_taxonomy", {}).get("free_decimal_correspondence", {}).get("primary", {}).get("code", "")[:2] != "61":
|
38 |
+
return False
|
39 |
+
|
40 |
excluded = {"Promotional/Advertisement", "Machine-Generated", "Images/Videos/Audio",
|
41 |
"Truncated", "Spam/Ads", "Product Page", "Content Listing"}
|
42 |
|
|
|
45 |
if label := x.get("eai_taxonomy", {}).get(version, {}).get(level, {}).get("label"):
|
46 |
if label in excluded:
|
47 |
return False
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
48 |
return True
|
49 |
|
50 |
|